Search FNA Database

#!/usr/bin/perl ############################################################################# # PERL Program "fnasearch.pl" # # April 10, 1998 # # AUTHORS # John laPlante # Alex Frakt # # PURPOSE # This program handles the cgi scripting for the fna indexes. # # METHODOLOGY # A recursively called file that prints a form de search and processes the # data in the form once it is submitted. # ################################################################### # Include file for GDBM database and initialization of the GDBM database. # the use line. Place at top of script. use GDBM_File; require '/usr2/people/lis429/public_html/stem.pl'; #The following code was taken from a web site with URL #http://cgi-lib.stanford.edu/cgi-lib/ require "cgi-lib.pl"; # Print html header print < Search FNA Database

Search FNA Database

HEADER print <\n

Find Term 1		in field
AND	(e.g. species with Term 1 AND Term 2)
OR	(e.g. species with Term 1 OR Term 2)
Find Term 2		in field

\n END # ReadParse # Reads in GET or POST data, converts it to unescaped text, # creates key/value pairs in %in, using '\0' to separate multiple # selections if (!(&ReadParse)) { print &PrintVariables; # Here we process the query either directly or through sub calls. # Call the sub to create the gdbm databases. # Params => subdir location of gdbm files, and names. } else { print &PrintVariables; # Here we process the query either directly or through sub calls. # Call the sub to create the gdbm databases. # Params => subdir location of gdbm files, and names. $boolean = $in{'boolean'}; $searchbox1 = $in{'searchbox1'}; $searchbox2 = $in{'searchbox2'}; $indexes1 = $in{'indexes1'}; $indexes2 = $in{'indexes2'}; $doc_results = ""; @doc_results = "NullDoc"; @SelectedIndexes=($indexes1,$indexes2); $db=&database_start(@SelectedIndexes); if ($indexes1 =~ /MG Index $full text$/) { &mg_search($searchbox1, $indexes1); } else { @searchbox1 = $searchbox1; print "Searchbox1 = $searchbox1
"; @searchbox1 = &stem(@searchbox1); $searchbox = $searchbox1[0]; print "Stemmed Searchbox = $searchbox
"; &database_search($db,$searchbox,$indexes1); } if ($indexes2 =~ /MG Index $full text$/) { &mg_search($searchbox2, $indexes2); } else { @searchbox2 = $searchbox2; @searchbox2 = &stem(@searchbox2); $searchbox2 = $searchbox2[0]; &database_search($db,$searchbox2,$indexes2); } # print our results print "

Search results:

"; foreach $item (keys(%doc_hash)){ if(($doc_hash{$item}>1)||($boolean eq "OR")) { $counter++; print"$item
"; unless (open (DOCFILE, "/usr2/people/lis429/public_html/species/$item")) { die ("$0 Died -- document file $item , could not be input\n") ;} $linenum = 0; while ($line = ){ $linenum++; if ($linenum >= 9 && $linenum <= 12){ print $line; } #if(($line=~/^\Description/) && ($occurance<=1)) if($line=~/.*Description.*/) { @line=split(/ +/,$line); for($i=0;$i<=14;$i++) { print $line[$i]; print ' '; } print "
"; for($i=15;$i<=29;$i++) { print $line[$i]; print ' '; } print "
"; last; } } #end of while loop } #end of if statement } #end of foreach loop print"
Count of results is $counter
"; # synopsis: flatfiler.pl gdbm_file_1 [gdbm_file_n] # A very, very simple program designed to print the content of every GDBM file # supplied on the commandline # $fileX = "/usr2/people/lis429/public_html/fnadatabases/leaf.gdbm"; # my(%dbX); # my($dbrefX); # print "\nFile: $file\n
"; # $dbrefX=tie(%dbX, GDBM_File, $fileX, &GDBM_READER, 0664); # while (($keyX, $valX) = each(%dbX)) { # print "$keyX\n"; # print " $valX\n
"; # } #print "

"; } # Print closing html elements print "

\n"; print "
Class development page

"; print "Copyright UIUC GSLIS course 429"; print "\n"; print "\n"; # Exit the program normally. exit (0); # Begin Subroutines ------------------------------------ # mgquery # - calls out to mgquery.pl # - open filename # - read lines from that tmpfile and stick the names into doc_hash # &mg_search($searchbox2); sub mg_search { my($tmpfile) = "/tmp/fnaquery.$$"; my($query) = $_[0]; my($program) = "/usr2/people/lis429/public_html/mgquery.pl"; $result = system($program, "-o", "$tmpfile", "-q", "$query"); if ($result) { die("mg_search failed."); } open (TMP,"$tmpfile"); while ($la =) { chomp($la); $doc_hash{$la}++; } close(TMP); unlink($tmpfile); } # database_start # This routine initializes (gives a handle to them) the databases # that were specified in the # web form select list. The database files were created during the # index creation step (i.e. fnaindex.pl et. al.) sub database_start { my(@selectedindexes) = @_; # argument list: boldwords my(%databases); # one reference to a gdbm hash per bold my(%dbrefs); # one reference to a gdbm class per bold my(%dbstruct); # a hash to hold both dbrefs and databases foreach $index (@selectedindexes) { # for each boldword in our arg list # $store=$index; my(%db); # block-local: cleared each time thru loop my($dbnum); # %db for hash, $dbnum for class reference print $index; my($filename) = "/usr2/people/lis429/public_html/fnadatabases/" . $index.".gdbm"; # perform the tie() -- see man perlfunc/man perltie/man GBDM_File $dbnum = tie(%db, GDBM_File, $filename , &GDBM_READER, 0664); # take reference to hash and put in %databases, put class ref into # %dbref $databases{$index}=\%db; $dbref{$index}=$dbnum; } # do next index name # put references to the hash and class collections into %dbstruct $dbstruct{'databases'}=\%databases; # A hash to a pointer. $dbstruct{'references'}=\%dbref; # return a reference to %dbstruct return(\%dbstruct); } # database_start needs to be called after the boldwords are initialized. Our # final draft should call it thus: # $db = &database_start(indexnames); # # database_search: # This routine handles queries on the gdbm database files. # It expects to be passed the following: # $dbs - reference to the gdbm databases # $boolean - a string containing 'and' or 'or' for performing booolean # $searchbox - the string of keywords that the web form user submitted # @selected_indexes - the indexes that the web form users selected sub database_search{ my ($dbs, $searchbox, $selected_index)=@_; $searchbox =~ tr/A-Z/a-z/ ; # Convert to lowercase $searchbox =~ s/\W//g ; # Remove non word chars @searchterms = split(/ +/, $searchbox); # Split on blanks foreach $searchterm (@searchterms) { # The dereferencing here is all done inline to prevent repeatedly # re-hashing new references after modifying copies. In other words, # it's simpler to actually look at this mess three times than to # unpack the contents one level at a time. # The dereferencing looks like this: # {$dbs}->{'databases'}->{$boldword}->{$word} # $testvar = ${${${$dbs}{'databases'}}{'trees'}}{'rather'}; # print "rather hashed with trees = $testvar
"; # print " And that is a damn shame because rather goes with branches!
"; if ($doc_results = ${${${$dbs}{'databases'}}{$selected_index}}{$searchterm}) { @doc_results=split(/\s+/, $doc_results); foreach $doc(@doc_results){ $doc_hash{$doc}++; } } } # foreach $searchterm return %doc_hash; } #build the list of searchable databases (fields) for #drop down list boxes "indexes1" and "indexes2" sub buildDBlist { print "