#!/usr/bin/perl -- require 5; use strict; =head1 copyright Fluid Dynamics Search Engine Copyright 1997-2001 by Zoltan Milosevic. Please adhere to the copyright notice and conditions of use, described in the attached help file and hosted at the URL below. For the latest version and help files, visit: http://www.xav.com/scripts/search/ This search engine is managed from the web, and it comes with a password to keep it secure. You can set the password when you first visit this script using the special "Mode=Admin" query string - for example: http://my.host.com/search.pl?Mode=Admin If you edit the source code, you'll find it useful to restore the function comments and #&Assert checks: cd "search/searchmods/powerusr/" hacksubs.pl build_map hacksubs.pl restore_comments hacksubs.pl assert_on

If you can see this text from a web browser, then there is a problem. Get help here.

=cut use vars qw( $VERSION %FORM $realms %const %Rules @str @sendmail %entity_value_by_name ); (%FORM,$realms,%const,%Rules,@str,@sendmail,%entity_value_by_name) = (); $VERSION = '2.0.0.0051'; my $all_code = <<'END_OF_FILE'; @sendmail = ( '/usr/sbin/sendmail -t', '/usr/bin/sendmail -t', '/usr/lib/sendmail -t', '/usr/sendmail -t', '/bin/sendmail -t', ); local $_; foreach ('IFS','CDPATH','ENV','BASH_ENV','PATH') { delete $ENV{$_} if (defined($ENV{$_})); } binmode(STDOUT); %const = ( 'pdf utility folder' => "", 'global_lockfile_count' => 1, 'help_file' => 'http://www.xav.com/scripts/search/help/', 'script_start_time' => time(), 'remote_addr' => &query_env('REMOTE_ADDR'), 'script_name' => &query_env('SCRIPT_NAME'), 'admin_url' => &query_env('SCRIPT_NAME') . '?Mode=Admin', 'search_url' => &query_env('SCRIPT_NAME'), 'request_method' => 'POST', # warning - if you change 'request_method' to GET, most admin actions will fail because browsers don't xfer more than 2000 bytes per GET request 'file_mask' => 0766, 'copyright' => "<P ALIGN=center><FONT SIZE=-2> Powered by the<BR> <A HREF=\"http://www.xav.com/scripts/search/\" TARGET=_blank>Fluid Dynamics<BR>Search Engine</A><BR> v$VERSION<BR> &copy; 2001</FONT></P>", ); my $err = ''; Err: { # Give the folder where all data files are located: $err = &load_files( 'searchdata' ); next Err if ($err); my $address_offer = ''; my $terms = $FORM{'Terms'} || $FORM{'terms'} || $FORM{'q'} || ''; $FORM{'Terms'} = $FORM{'terms'} = $FORM{'q'} = $terms; $const{'terms'} = &html_encode($FORM{'Terms'}); AddressAsTerm: { last unless ($Rules{'handling url search terms'} > 1); last if ($terms =~ m!\s!); my $address = ''; if ($terms =~ m!^(http|ftp|https|telnet)://(\w+)\.(\w+)(.*)$!) { $address = $terms; } elsif ($terms =~ m!^www\.(\w+)\.(\w+)(.*)$!i) { $address = "http://$terms"; } if ($address) { $address_offer = &pstr(3, &html_encode($address), &html_encode($address) ); if ($Rules{'handling url search terms'} == 3) { print "Status: 302 Moved Temporarily\015\012"; print "Location: $address\015\012"; print "Content-Type: text/html\015\012\015\012"; print $address_offer; last Err; } } } if ($FORM{'NextLink'}) { #changed 0034 - fixes bug where NextLink contains & if (&query_env('QUERY_STRING') =~ m!^NextLink=(.*)$!) { $FORM{'NextLink'} = $1; } my $html_link = &html_encode($FORM{'NextLink'}); # security re-director from admin screen (prevents query-string-based # password from showing up in referer logs of remote systems: print "Content-Type: text/html\015\012\015\012"; print "<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;URL=$html_link\"></HEAD><A HREF=\"$html_link\">$html_link</A>"; last Err; } my $Realm = $FORM{'Realm'} || 'All'; $const{'realm'} = &html_encode($Realm); if ($FORM{'Mode'} eq 'Admin') { $err = &admin_main(); next Err if ($err); last Err; } $const{'copyright'} =~ s!<BR>! !sg; # improve perceived snappiness $| = 1; print "Content-Type: text/html\015\012\015\012"; #changed 0046 if ($FORM{'Mode'} eq 'SearchForm') { print &str_search_form( &html_encode($FORM{'search_url'}) || $const{'search_url'} ); last Err; } &PrintTemplate(0, 'header.htm', $Rules{'language'}, \%const); $| = 0; if ($FORM{'Mode'} eq 'AnonAdd') { &anonadd_main(); } elsif (not ($terms)) { print &str_search_form( $const{'search_url'} ); &PrintTemplate(0, 'tips.htm', $Rules{'language'}, \%const); } else { if ($address_offer) { print '<P>' . $address_offer . '</P>'; } my $Rank = $FORM{'Rank'} || 1; my ($bTermsExist, $Ignored_Terms, $Important_Terms, $DocSearch, $RealmSearch, $where_clause, @SearchTerms) = &parse_search_terms($terms, $FORM{'Match'}); #changed 0042 - persist maxhits my $linkhits = $const{'search_url'} . '?Realm=' . &url_encode($FORM{'Realm'}) . "&Match=$FORM{'Match'}&Terms=" . &url_encode($terms); unless ($realms->realm_count('is_runtime')) { $linkhits .= '&nocpp=1'; } if ($FORM{'sort-method'}) { $linkhits .= '&sort-method=' . &url_encode($FORM{'sort-method'}); } my ($pages_searched, @HITS, $p_realm_data, $DD, $MM, $YYYY, $FBYTES) = (0); Search: { next Search unless ($bTermsExist); #changed 0042 -- added support for include-by-name, fixed runtime under sql # include runtime realms: if ($Realm eq 'include-by-name') { unless ($FORM{'nocpp'}) { foreach $p_realm_data ($realms->listrealms('is_runtime')) { next unless ($FORM{"Realm:$$p_realm_data{'name'}"}); $linkhits .= "&Realm:$$p_realm_data{'url_name'}=1"; $const{'record_realm'} = $$p_realm_data{'url_name'}; &SearchRunTime($p_realm_data, $DocSearch, \$pages_searched, \@HITS); } } } elsif ($Realm eq 'All') { unless ($FORM{'nocpp'}) { foreach $p_realm_data ($realms->listrealms('is_runtime')) { $const{'record_realm'} = $$p_realm_data{'url_name'}; &SearchRunTime($p_realm_data, $DocSearch, \$pages_searched, \@HITS); } } } else { ($err, $p_realm_data) = $realms->hashref($Realm); next Err if ($err); if (($$p_realm_data{'is_runtime'}) and (not $FORM{'nocpp'})) { $const{'record_realm'} = $$p_realm_data{'url_name'}; &SearchRunTime($p_realm_data, $DocSearch, \$pages_searched, \@HITS); last Search; } } # include indexed realms: if ($Rules{'sql: enable'}) { if ($Realm eq 'include-by-name') { my @id_ok = (); foreach $p_realm_data ($realms->listrealms('all')) { next unless ($FORM{"Realm:$$p_realm_data{'name'}"}); $linkhits .= "&Realm:$$p_realm_data{'url_name'}=1"; push(@id_ok, "(realm_id = $$p_realm_data{'realm_id'})"); $pages_searched += $$p_realm_data{'pagecount'}; } last Search unless (@id_ok); $where_clause .= ' AND (' . join(' OR ', @id_ok) . ')'; } elsif ($Realm ne 'All') { ($err, $p_realm_data) = $realms->hashref($Realm); next Err if ($err); $where_clause .= " AND realm_id = $$p_realm_data{'realm_id'}"; $pages_searched = $$p_realm_data{'pagecount'}; } else { foreach $p_realm_data ($realms->listrealms('all')) { $pages_searched += $$p_realm_data{'pagecount'}; } } $const{'record_realm'} = ''; &SearchDatabase($where_clause, $DocSearch, \@HITS); } else { if ($Realm eq 'include-by-name') { foreach $p_realm_data ($realms->listrealms('has_file')) { next unless ($FORM{"Realm:$$p_realm_data{'name'}"}); $linkhits .= "&Realm:$$p_realm_data{'url_name'}=1"; $const{'record_realm'} = $$p_realm_data{'url_name'}; &SearchIndexFile($$p_realm_data{'file'}, $RealmSearch, \$pages_searched, \@HITS); } } elsif ($Realm ne 'All') { ($err, $p_realm_data) = $realms->hashref($Realm); next Err if ($err); $const{'record_realm'} = $$p_realm_data{'url_name'}; &SearchIndexFile($$p_realm_data{'file'}, $RealmSearch, \$pages_searched, \@HITS); } else { foreach $p_realm_data ($realms->listrealms('has_file')) { $const{'record_realm'} = $$p_realm_data{'url_name'}; &SearchIndexFile($$p_realm_data{'file'}, $RealmSearch, \$pages_searched, \@HITS); } } } } my ($HitCount, $PerPage, $Next) = (scalar @HITS, $Rules{'hits per page'}, 0); if (($FORM{'maxhits'} =~ m!^(\d+)$!) and ($FORM{'maxhits'} > 0)) { $PerPage = $1; $linkhits .= '&maxhits=' . $PerPage; } my $Remaining = $HitCount - $Rank - $PerPage + 1; my $RangeUpper = $Rank + $PerPage - 1; if ($Remaining >= $PerPage) { $Next = $PerPage; } elsif ($Remaining > 0) { $Next = $Remaining; } else { $RangeUpper = $HitCount; } my @Ads = &SelectAdEx(\@SearchTerms); print $Ads[0]; print '<B>' . $str[10] . '</B><BR>'; if ($Ignored_Terms) { &ppstr(11, &html_encode($Ignored_Terms)); } if ($HitCount) { &ppstr(12, &html_encode($Important_Terms), $pages_searched); } else { &ppstr(13, &html_encode($Important_Terms), $pages_searched); } print '<BR>'; print $Ads[1]; PrintHits: { if ($HitCount < 1) { # print: No documents found print "<P>$str[19]</P>\n"; last PrintHits; } # print: Results $Rank-$RangeUpper of $HitCount &ppstr(14, $Rank, $RangeUpper, $HitCount ); my ($jump_sum, $jumptext) = &str_jumptext( $Rank, $PerPage, $HitCount, "$linkhits&Rank=", 1 ); # $jump_sum = "Documents 1-10 of 15 displayed." # $jumptext = "<P><- Previous 1 2 3 4 5 Next -></P>" my $i = $Rank; foreach ((sort @HITS)[($Rank-1)..($RangeUpper-1)]) { next unless (m!^\d+\.(\d+)\.(\d+)\s*\d*\s*\d* u= (.+) t= (.*?) d= (.*?) c= (.*?) r= (.*?)$!); ($DD, $MM, $YYYY, $FBYTES) = (unpack('A2A2A2A4A*',$2))[1..4]; my $relevance = 10E6 - $1; print &StandardVersion( \@SearchTerms, 'relevance' => $relevance, 'redirector' => $Rules{'redirector'}, 'rank' => $i, 'url' => $3, 'title' => $4, 'description' => $5, 'size' => $FBYTES, 'dd' => $DD, 'mm' => $MM, 'yyyy' => $YYYY, 'context' => $6, 'record_realm' => &html_encode(&url_decode($7)), ); $i++; } print $jump_sum; print $jumptext; } print $Ads[2]; print &str_search_form($const{'search_url'}); print $Ads[3]; &log_search( $Realm, $terms, $Rank, $HitCount, $pages_searched ); } if (($Rules{'allowanonadd'}) and ($realms->realm_count('has_no_base_url')) and ($const{'mode'} != 3)) { # print: Search Tips - Add New URL - Main Page &PrintTemplate(0, 'linkline2.txt', $Rules{'language'}, \%const); } else { # print: Search Tips - Main Page &PrintTemplate(0, 'linkline1.txt', $Rules{'language'}, \%const); } &PrintTemplate(0, 'footer.htm', $Rules{'language'}, \%const); last Err; } continue { print "Content-Type: text/html\015\012\015\012"; print "<P><B>Error:</B> $err.</P>\n"; } sub query_env { my ($name,$default) = @_; if (($ENV{$name}) and ($ENV{$name} =~ m!^(.*)$!s)) { return $1; } elsif (defined($default)) { return $default; } else { return ''; } } sub untaintme { my ($p_val) = @_; $$p_val = $1 if ($$p_val =~ m!^(.*)$!s); } sub load_files { my ($data_files_dir) = @_; my $err = ''; Err: { # This manually sets the current working directory to the directory that # contains this script. This is necessary in case people have used a # relative path to the $data_files_dir: if (($0 =~ m!^(.*)(\\|/)!) and ($0 !~ m!safeperl\d*$!)) { #changed 0045 - added error check unless (chdir($1)) { $err = "unable to chdir to script folder '$1' - $!"; next Err; } push(@INC, "$1/searchmods"); } push(@INC, './searchmods', '../searchmods'); unless (-e 'searchmods') { $err = "directory 'searchmods' does not exist"; next Err; } #require my $lib = 'common.pl'; delete $INC{$lib}; require $lib; if (&version_c() ne $VERSION) { $err = "the library '$lib' is not version $VERSION"; next Err; } #/require &ReadInput(); #require unless ($FORM{'nocpp'}) { $lib = 'common_parse_page.pl'; delete $INC{$lib}; require $lib; if (&version_cpp() ne $VERSION) { $err = "the library '$lib' is not version $VERSION"; next Err; } } #/require if ($FORM{'ApproveRealm'}) { $FORM{'Realm'} = $FORM{'ApproveRealm'}; $FORM{'Mode'} = 'Admin'; $FORM{'Action'} = 'FilterRules'; $FORM{'subaction'} = 'ShowPending'; } unless ($FORM{'Mode'}) { #reverse compat - pre-0010 if ($FORM{'AddSite'}) { $FORM{'Mode'} = 'AnonAdd'; $FORM{'URL'} = $FORM{'AddSite'}; delete $FORM{'AddSite'}; } #/reverse compat #changed 0043 - be nice to precious users; ?mode=admin, ?MODE=ADMIN will work if ('mode=admin' eq lc(&query_env('QUERY_STRING'))) { $FORM{'Mode'} = 'Admin'; } } #reverse compat 0030 if ($FORM{'Action'} eq 'ReCrawlRealm') { $FORM{'Action'} = 'rebuild'; } #/reverse compat my $is_admin_rq = (($FORM{'Mode'}) and (($FORM{'Mode'} eq 'Admin') or ($FORM{'Mode'} eq 'AnonAdd'))) ? 1 : 0; $is_admin_rq = 1 if (&query_env('FDSE_NO_EXEC')); #require if ($is_admin_rq) { $lib = 'common_admin.pl'; delete $INC{$lib}; require $lib; if (&version_ca() ne $VERSION) { $err = "the library '$lib' is not version $VERSION"; next Err; } $lib = 'common_parse_page.pl'; require $lib; if (&version_cpp() ne $VERSION) { $err = "the library '$lib' is not version $VERSION"; next Err; } } #/require unless (chdir($data_files_dir)) { $err = "unable to chdir to data files directory '$data_files_dir' - $!"; next Err; } $const{'bypass_file_locking'} = (-e 'bypass_file_locking.txt') ? 1 : 0; # Can we load the rules? $err = &LoadRules(); next Err if ($err); my $str_file = 'templates/' . $Rules{'language'} . '/strings.txt'; my $str_text; ($err, $str_text) = &ReadFileL($str_file); next Err if ($err); @str = (0); my $i = 1; foreach (split(m!\n!s,$str_text)) { s!(\r|\n|\015|\012)!!g; push(@str,$_); unless ($is_admin_rq) { last if ($i > 87);#strdepth } $i++; } unless (&Trim($str[1]) eq "VERSION $VERSION") { $err = "strings file is not version $VERSION ($str[1])"; next Err; } $const{'language_str'} = $str[2]; $realms = &fdse_realms_new(); $realms->use_database($Rules{'sql: enable'}); $realms->load(); $const{'is_demo'} = 1 if (-e 'is_demo'); $const{'mode'} = $Rules{'mode'}; if (($const{'mode'} == 2) and (not $Rules{'regkey'})) { $const{'mode'} = 1; } $const{'mode'} = 0 if (-e 'is_demo'); last Err; } return $err; } END_OF_FILE undef($@); eval $all_code; if ($@) { my $errstr = $@; print "Content-Type: text/html\015\012\015\012"; print "<HR><P><B>Perl Execution Error</B> in $0:</P><BLOCKQUOTE><XMP>$@";# $errstr =~ s!\"!\&quot;!g; $errstr =~ s!\<!\&lt;!g; $errstr =~ s!\>!\&gt;!g; print <<"EOM"; <FORM METHOD="post" ACTION="http://www.xav.com/bug.pl"> <INPUT TYPE=hidden NAME="product" VALUE="search"> <INPUT TYPE=hidden NAME="version" VALUE="$VERSION"> <INPUT TYPE=hidden NAME="Perl Version" VALUE="$]"> <INPUT TYPE=hidden NAME="Script Path" VALUE="$0"> <INPUT TYPE=hidden NAME="Perl Error" VALUE="$errstr"> EOM my ($name, $value) = (); while (($name, $value) = each %FORM) { print "<INPUT TYPE=hidden NAME=\"Form: $name\" VALUE=\"$value\">\n"; } print <<"EOM"; <P>Please report this error to the script author:</P> <BLOCKQUOTE><INPUT TYPE="submit" VALUE="Report Error"></BLOCKQUOTE> </FORM><HR> EOM } 1;