setlocale(LC_ALL, "ru_RU.CP1251");
error_reporting(E_ALL);
/**
* RiSearch PHP
*
* web search engine, version 0.2
* (c) Sergej Tarasov, 2000-2004
*
* Homepage: http://risearch.org/
* email: risearch@risearch.org
*/
#===================================================================
#
# Set variables below
#
#===================================================================
# Directory where yours html files are located
# In most cases you may use path relative to the location of script
# Or use absolute path
# Type "./" for the current directory
$base_dir = "../";
# Base URL of your site
$base_url = "http://sobranie.org/";
# site size
# 1 - Tiny ~1Mb
# 2 - Medium ~10Mb
# 3 - Big ~50Mb
# 4 - Large >100Mb
$site_size = 1;
# Path to index database files
$HASH = "db/0_hash";
$HASHWORDS = "db/0_hashwords";
$FINFO = "db/0_finfo";
$SITEWORDS = "db/0_sitewords";
$WORD_IND = "db/0_word_ind";
#===================================================================
#
# These variables are used by spider
#
#===================================================================
# Starting URL (used by spider)
$start_url = array(
"http://sobranie.org/",
);
# Spider will index only files from these servers
$allow_url = array(
"http://sobranie.org",
);
#===================================================================
#
# All other variables are optional. Script should work fine
# with default settings.
# These variables controls the indexing process.
#
#===================================================================
# File extensions to index
# Add "NONE" if you want to index files without extensions
$file_ext = "shtml";
# List of directories, which should not be indexed
$no_index_dir = "images cgi-bin ssi search en forum mail";
# List of files, which should not be indexed
$no_index_files = 'robots.txt links_ad.shtml';
#minimum word length to index
$min_length = 3;
# Index or not numbers (set $numbers = "" if you don't want to index numbers)
# You may add here other non-letter characters, which you want to index
$numbers = '0-9';
# Parts of documents, which should not be indexed
# Uncomment and edit, if you want to use this feature
$use_selective_indexing = "YES";
$no_index_strings = array(
"" => "",
"" => "",
);
# Cut default filenames from URL ("YES" or "NO")
$cut_default_filenames = "YES";
$default_filenames = "index.htm index.shtml default.htm";
# Convert URL to lower case ("YES" or "NO")
$url_to_lower_case = 'NO';
# Indexing scheme
# Whole word - 1
# Beginning of the word - 2
# Every substring - 3
$INDEXING_SCHEME = 2;
# Translate escape chars (like È or ÿ) ("YES" or "NO")
$use_esc = "NO";
# Index META tags ("YES" or "NO")
$use_META = "YES";
# List of stopwords ("YES" or "NO")
$use_stop_words = "NO";
$stop_words = "and any are but can had has have her here him his
how its not our out per she some than that the their them then there
these they was were what you";
#===================================================================
#
# These variables controls the script output.
#
#===================================================================
# Number of results per page
$res_num = 10;
# Define length of page description in output
# and use META description ("YES") or first "n" characters of page ("NO")
$descr_size = 256;
$use_META_descr = "YES";
#===================================================================
#
# --- end of configuration ---
#
# Please do not edit below this line unless you know what you do
#
#===================================================================
if ($site_size == 1) {
$HASHSIZE = 20001;
} elseif ($site_size == 3) {
$HASHSIZE = 100001;
} elseif ($site_size == 4) {
$HASHSIZE = 300001;
} else {
$HASHSIZE = 50001;
}
#===================================================================
function prepare_string($str) {
$str = preg_replace ("/^\s+|\s+$/", "", $str);
$str = preg_replace ("/\s+/", "|", $str);
$str = preg_replace ("/\./", "\\\.", $str);
$str = "(".$str.")";
return $str;
}
if (preg_match("/NONE/",$file_ext) ) {
$file_ext = preg_replace ("/NONE/", "", $file_ext);
$file_ext = prepare_string($file_ext);
$file_ext = '(\.'.$file_ext.'|/[^.]+|/)($|\?)';
} else {
$file_ext = prepare_string($file_ext);
$file_ext = '(\.'.$file_ext.'|/)($|\?)';
}
$no_index_dir = prepare_string($no_index_dir);
$no_index_files = prepare_string($no_index_files);
$default_filenames = prepare_string($default_filenames);
$default_filenames = '/'.$default_filenames.'$';
#===================================================================
$stop_words = preg_replace("/\s+/s"," ",$stop_words);
$pos = 0;
do {
$new_pos = strpos($stop_words," ",$pos);
if ($new_pos === FALSE) {
$word = substr($stop_words,$pos);
$stop_words_array[$word] = 1;
break;
};
$word = substr($stop_words,$pos,$new_pos-$pos);
$stop_words_array[$word] = 1;
$pos = $new_pos+1;
} while (1>0);
#===================================================================
$html_esc = array(
"À" => chr(192),
"Á" => chr(193),
"Â" => chr(194),
"Ã" => chr(195),
"Ä" => chr(196),
"Å" => chr(197),
"Æ" => chr(198),
"Ç" => chr(199),
"È" => chr(200),
"É" => chr(201),
"&Eirc;" => chr(202),
"Ë" => chr(203),
"Ì" => chr(204),
"Í" => chr(205),
"Î" => chr(206),
"Ï" => chr(207),
"Ð" => chr(208),
"Ñ" => chr(209),
"Ò" => chr(210),
"Ó" => chr(211),
"Ô" => chr(212),
"Õ" => chr(213),
"Ö" => chr(214),
"×" => chr(215),
"Ø" => chr(216),
"Ù" => chr(217),
"Ú" => chr(218),
"Û" => chr(219),
"Ü" => chr(220),
"Ý" => chr(221),
"Þ" => chr(222),
"ß" => chr(223),
"à" => chr(224),
"á" => chr(225),
"â" => chr(226),
"ã" => chr(227),
"ä" => chr(228),
"å" => chr(229),
"æ" => chr(230),
"ç" => chr(231),
"è" => chr(232),
"é" => chr(233),
"ê" => chr(234),
"ë" => chr(235),
"ì" => chr(236),
"í" => chr(237),
"î" => chr(238),
"ï" => chr(239),
"ð" => chr(240),
"ñ" => chr(241),
"ò" => chr(242),
"ó" => chr(243),
"ô" => chr(244),
"õ" => chr(245),
"ö" => chr(246),
"÷" => chr(247),
"ø" => chr(248),
"ù" => chr(249),
"ú" => chr(250),
"û" => chr(251),
"ü" => chr(252),
"ý" => chr(253),
"þ" => chr(254),
"ÿ" => chr(255),
" " => " ",
"&" => " ",
""e;" => " ",
);
#=====================================================================
#
# Function esc2char($str)
# Last modified: 16.04.2004 18:22
#
#=====================================================================
function esc2char($str) {
global $html_esc;
$esc = $str[0];
$char = "";
if (preg_match ("/&[a-zA-Z]*;/", $esc)) {
if (isset ($html_esc[$esc])) {
$char = $html_esc[$esc];
} else {
$char = " ";
}
} elseif (preg_match ("/([0-9]*);/", $esc, $matches)) {
$char = chr($matches[1]);
} elseif (preg_match ("/([0-9a-fA-F]*);/", $esc, $matches)) {
$char = chr(hexdec($matches[1]));
}
return $char;
}
#=====================================================================
?>
setlocale(LC_ALL, "ru_RU.CP1251");
error_reporting(E_ALL);
/**
* RiSearch PHP
*
* web search engine, version 0.2
* (c) Sergej Tarasov, 2000-2004
*
* Homepage: http://risearch.org/
* email: risearch@risearch.org
*/
#=====================================================================
#
# Function hash1($key)
# Last modified: 16.04.2004 17:54
#
#=====================================================================
function hash1($key) {
$chars = preg_split("//",$key);
for($i=1;$i> 24; };
$h &= ~$g;
}
return $h;
}
#=====================================================================
#
# Function getmicrotime()
# Last modified: 16.04.2004 17:54
#
#=====================================================================
function getmicrotime(){
list($usec, $sec) = explode(" ",microtime());
return ((float)$usec + (float)$sec);
}
#=====================================================================
#
# Function get_META_info($html)
# Last modified: 05.04.2005 16:34
#
#=====================================================================
function get_META_info($html) {
preg_match("/<\s*[Mm][Ee][Tt][Aa]\s*[Nn][Aa][Mm][Ee]=\"?[Kk][Ee][Yy][Ww][Oo][Rr][Dd][Ss]\"?\s*[Cc][Oo][Nn][Tt][Ee][Nn][Tt]=\"?([^\"]*)\"?\s*>/s",$html,$matches);
$res[0] = @$matches[1];
preg_match("/<\s*[Mm][Ee][Tt][Aa]\s*[Nn][Aa][Mm][Ee]=\"?[Dd][Ee][Ss][Cc][Rr][Ii][Pp][Tt][Ii][Oo][Nn]\"?\s*[Cc][Oo][Nn][Tt][Ee][Nn][Tt]=\"?([^\"]*)\"?\s*>/s",$html,$matches);
$res[1] = @$matches[1];
return $res;
}
#=====================================================================
#
# Function index_file($html_text,$url)
# Last modified: 15.07.2004 11:35
#
#=====================================================================
function index_file($html_text,$url) {
global $cfn, $kbcount, $descr_size, $min_length, $stop_words_array, $use_esc;
global $use_selective_indexing, $no_index_strings;
global $use_META, $use_META_descr;
global $fp_FINFO;
global $words;
global $numbers;
$cfn++;
$size = strlen($html_text);
$kbcount += intval($size/1024);
print "$cfn -> $url; totalsize -> $kbcount kb \n";
# Delete parts of document, which should not be indexed
if ($use_selective_indexing == "YES") {
foreach ($no_index_strings as $k => $v) {
$html_text = preg_replace("/$k.*?$v/s"," ",$html_text);
}
}
$title = "";
if (preg_match("/\s*(.*?)\s*<\/title>/is",$html_text,$matches)) {
$title = $matches[1];
}
$title = preg_replace("/\s+/"," ",$title);
$keywords = "";
$description = "";
if ($use_META == "YES") {
$res = get_META_info($html_text);
$keywords = $res[0];
$description = $res[1];
}
$html_text = preg_replace("/\s*(.*?)\s*<\/title>/is"," ",$html_text);
$html_text = preg_replace("//s"," ",$html_text);
$html_text = preg_replace("/<[Ss][Cc][Rr][Ii][Pp][Tt].*?<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/s"," ",$html_text);
$html_text = preg_replace("/<[Ss][Tt][Yy][Ll][Ee].*?<\/[Ss][Tt][Yy][Ll][Ee]>/s"," ",$html_text);
$html_text = preg_replace("/<[^>]*>/s"," ",$html_text);
if ($use_esc == "YES") { $html_text = preg_replace_callback("/&[a-zA-Z0-9#]*?;/", 'esc2char', $html_text); }
if (($use_META_descr == "YES") & ($description != "")) {
$descript = substr($description,0,$descr_size);
} else {
$html_text = preg_replace("/\s+/s"," ",$html_text);
$descript = substr($html_text,0,$descr_size);
}
$html_text = $html_text." ".$keywords." ".$description." ".$title;
$html_text = preg_replace("/[^a-zA-Zа-яА-Я$numbers -]/"," ",$html_text);
$html_text = preg_replace("/\s+/s"," ",$html_text);
$html_text = strtolower($html_text);
$words_temp = array();
$pos = 0;
do {
$new_pos = strpos($html_text," ",$pos);
if ($new_pos === FALSE) {
$word = substr($html_text,$pos);
$words_temp[$word] = 1;
break;
};
$word = substr($html_text,$pos,$new_pos-$pos);
$words_temp[$word] = 1;
$pos = $new_pos+1;
} while (1>0);
$title = preg_replace("/:+/",":",$title);
$descript = preg_replace("/:+/",":",$descript);
if ($title == "") { $title = "No title"; }
$pos = ftell($fp_FINFO);
$pos = pack("N",$pos);
fwrite($fp_FINFO, "$url::$size::$title::$descript\x0A");
foreach($words_temp as $word => $val) {
if (strlen($word) < $min_length) { continue; }
if (array_key_exists($word,$stop_words_array)) { continue; }
@$words[$word] .= $pos;
}
unset($words_temp);
unset($words_temp2);
}
#=====================================================================
#
# Function build_hash()
# Last modified: 16.04.2004 17:54
#
#=====================================================================
function build_hash() {
global $words;
global $HASHSIZE, $INDEXING_SCHEME, $HASH, $HASHWORDS;
for ($i=0; $i<$HASHSIZE; $i++) {$hash_array[$i] = "";};
foreach($words as $word=>$value) {
if ($INDEXING_SCHEME == 3) { $subbound = strlen($word)-3; }
else { $subbound = 1; }
if (strlen($word)==3) {$subbound = 1;}
$substring_length = 4;
if ($INDEXING_SCHEME == 1) { $substring_length = strlen($word); }
for ($i=0; $i<$subbound; $i++){
$hash_value = abs(hash1(substr($word,$i,$substring_length)) % $HASHSIZE);
$hash_array[$hash_value] .= $value;
};
}
$fp_HASH = fopen ("$HASH", "wb") or die("Can't open index file!");
$fp_HASHWORDS = fopen ("$HASHWORDS", "wb") or die("Can't open index file!");
$zzz = pack("N", 0);
fwrite($fp_HASHWORDS, $zzz);
$pos_hashwords = ftell($fp_HASHWORDS);
$to_print_hash = "";
$to_print_hashwords = "";
for ($i=0; $i<$HASHSIZE; $i++){
if ($hash_array[$i] == "") {$to_print_hash .= $zzz;};
if ($hash_array[$i] != "") {
$to_print_hash .= pack("N",$pos_hashwords + strlen($to_print_hashwords));
$to_print_hashwords .= pack("N", strlen($hash_array[$i])/8).$hash_array[$i];
};
if (strlen($to_print_hashwords) > 64000) {
fwrite($fp_HASH,$to_print_hash);
fwrite($fp_HASHWORDS,$to_print_hashwords);
$to_print_hash = "";
$to_print_hashwords = "";
$pos_hashwords = ftell($fp_HASHWORDS);
}
}; # for $i
fwrite($fp_HASH,$to_print_hash);
fwrite($fp_HASHWORDS,$to_print_hashwords);
fclose($fp_HASH);
fclose($fp_HASHWORDS);
}
#=====================================================================
?>
Fatal error: Uncaught Error: Call to undefined function getmicrotime() in /var/www/p681921/data/www/sobranie.org/search/search.php:24
Stack trace:
#0 {main}
thrown in /var/www/p681921/data/www/sobranie.org/search/search.php on line 24
|