#!/usr/bin/perl -w use CGI qw(:standard); use CGI::Carp qw(warningsToBrowser fatalsToBrowser); use strict; #DECIPHER ALL THE SEARCH PARAMETERS IN THIS NEXT CODE BLOCK my $title = param("title"); my $pub = param("publisher"); my $issn = param("issn"); my $sort = param("sort"); my @subs = ("Agriculture","Biology","Business","Chemistry","Computer Science","Economics","Education","Engineering","Geology","History","Humanities","Law","Mathematics","Medicine","Physics","Psychology","Social Science","Nocat"); my @subjects = (); foreach my $sub (@subs){ push @subjects, $sub if (param($sub) eq "on"); } @subjects = @subs if (@subjects == 0); my @vals = ("good","medium","bad"); my @values = (); foreach my $v (@vals){ push @values, $v if (param($v) eq "on"); } @values = @vals if (@values == 0); #DECIPHER THE ACTUAL SEARCH KEYWORDS AND PHRASES FROM THE TEXT THAT WAS TYPED my @titlewords = splitFields($title); my @pubwords = splitFields($pub); #THE SEARCH KEYWORDS WERE FORMATTED INCORRECTLY, SO SAY AN ERROR MESSAGE AND EXIT if ($titlewords[0] eq "unmatchedquotation" || $pubwords[0] eq "unmatchedquotation"){ print "Error: Invalid search string. Possible unmatched quotation.<\/i>
Back to Search";
exit;
}
#THE SEARCH KEYWORDS WILL BE USED IN REGEXES LATER, SO ESCAPE ALL THE PUNCTUATION AND
#SUBSTITUTE PERL REGEX FOR THE SYNTAX ALLOWED ON THE SEARCH PAGE (* AND ?).
foreach my $t (@titlewords,@pubwords,$issn){
$t =~ s/([\$\\\.\|\(\)\[\]\*\+\?\{\}\@\-\'\"\/\`\~\,\<])/\\$1/ig;
$t =~ s/\\\?/\./g;
$t =~ s/\\\*/.\*/g;
}
#ISSN IS HANDLED BY ITSELF LATER SO IT NEEDS TO BE .* TO RETURN ALL RESULTS WITH BLANK SEARCH FIELD
$issn = ".*" if ($issn eq "");
#THE DATABASE SHOULD BE SAVED AS DATA.TXT IN THE CGI-BIN WITH THIS PROGRAM
open(my $IN, "data.txt") or die "Can't open data.\n";
my @journals = <$IN>;
#IN DATA.TXT THERE ARE 17 TAB-DELMITED FIELDS FOR EACH JOURNAL. THIS ALIASES THE FIELD NUMBER
#WITH THE FIELD NAME FOR READABILITY, ETC. SEVERAL FIELDS AREN'T USED IN THE WEBSITE, THESE ARE
#PRIVATE INFORMATION, LIKE THE LISTED PRICE, INTERPRETED PRICE, CPI AND OTHER INDICES SINCE 1980, ETC.
my $titleindex = 0;
my $pubindex = 2;
my $issnindex = 1;
my $subindex = 3;
my $ppaindex = 6;
my $ppcindex = 7;
my $cpiindex = 8;
my $ypindex = 4;
my $cpiavgindex = 9;
my $valueindex = 10;
my $profitindex = 5;
#BUILD AN ARRAY OF THE JOURNALS THAT PASS THE SEARCH CRITERIA
my @results = ();
foreach my $jrn (@journals){
chomp $jrn;
my @fields = split(/\t/,$jrn);
#MAKE SURE:
# AT LEAST ONE OF THE SUBJECTS BEING SEARCHED IS LISTED IN THE SUBJECT FIELD OF THE JOURNAL
# ALL OF THE TITLE KEYWORDS/PHRASES ARE IN THE TITLE
# ALL OF THE PUBLISHER KEYWORDS/PHRASES ARE IN THE PUBLISHER FIELD
# THE ISSN (REGEX) THAT WAS SEARCHED FOR MATCHES THE ISSN FIELD
# THE VALUE FIELD IS ONE OF THE CATEGORIES SEARCHED FOR
#IF YES, THEN ADD THIS JOURNAL TO THE RESULTS
if ( (!(noneIn($fields[$subindex],@subjects))) && (allIn($fields[$titleindex],@titlewords)) && (allIn($fields[$pubindex],@pubwords)) && (" $fields[$issnindex] " =~ / $issn /i) && !(noneIn($fields[$valueindex],@values)) ) {
push @results, $jrn;
}
}
#FIGURE OUT WHICH FIELD WE ARE SORTING THE RESULTS BY
my $sortindex = 0;
if ($sort eq "pubsort") {
$sortindex = $pubindex;
} elsif ($sort eq "issnsort"){
$sortindex = $issnindex;
} elsif ($sort eq "ppasort"){
$sortindex = $ppaindex;
} elsif ($sort eq "ppcsort"){
$sortindex = $ppcindex;
} elsif ($sort eq "cpisort"){
$sortindex = $cpiindex;
} elsif ($sort eq "ypsort"){
$sortindex = $ypindex;
} elsif ($sort eq "valuesort"){
$sortindex = $cpiavgindex;
}
#THERE IS A DIFFERENT SORT FUNCTION FOR NUMERIC FIELDS THAN ALPHANUMERIC FIELDS, SO FIGURE OUT WHICH TO USE
my $numericsort = 0;
$numericsort = 1 if ($sort eq "ppcsort" || $sort eq "ppasort" || $sort eq "cpisort" || $sort eq "ypsort" || $sort eq "valuesort");
#SORT THE RESULTS ALPHABETICALLY OR NUMERICALLY
my @sorted = sort {
if($numericsort){
return toNumber(getField($a,$sortindex)) <=> toNumber(getField($b,$sortindex));
} else {
return lc(getField($a,$sortindex)) cmp lc(getField($b,$sortindex));
}
} @results;
#REVERSE THE LIST IF THE SEARCHER WANTS THEM IN DESCENDING ORDER
@sorted = reverse @sorted if (param("order") eq "desc");
#OUTPUT THE SEARCH RESULTS TO THE WEB BROWSER.
#THIS BLOCK DOES IT IN TEXT FORMAT IF THAT'S WHAT THE SEARCHER SELECTED
if (param("text") eq "on"){
print "Content-type: text/plain\n\n";
print "RESULT NUMBER\tTITLE\tISSN\tPUBLISHER\tSUBJECT\tPRICE PER ARTICLE\tPRICE PER CITATION\tCOMPOSITE PRICE INDEX\tRELATIVE PRICE INDEX\tYEAR FIRST PUBLISHED\tPROFIT STATUS\tVALUE CATEGORY\n";
for (my $i=0; $i<=$#sorted; $i++){
my $r = $sorted[$i];
my @fields = split(/\t/,$r);
my $j = $i+1;
print "$j\t$fields[$titleindex]\t$fields[$issnindex]\t$fields[$pubindex]\t";
#REPLACE "NOCAT" WITH "MISC" IN THE SUBJECT FIELD
my $subjectfield = $fields[$subindex];
$subjectfield =~ s/Nocat/Misc/ig;
#FORMAT THE PRICE INDICES HERE
my $ppa = $fields[$ppaindex];
my $ppc = $fields[$ppcindex];
my $cpi = $fields[$cpiindex];
my $val = $fields[$cpiavgindex];
foreach my $p ($ppa,$ppc,$cpi,$val){
#ONLY DISPLAY TWO DECIMAL PLACES, AND DON'T PRINT THE DUMMY NUMBER "999999" BECAUSE THAT MEANS THERE WEREN'T
# ANY CITATIONS OR ARTICLES SO THE PRICE PER ARTICLE/CITATION AND COMPOSITE INDEX COULDN'T BE CALCULATED
my $t = index($p,".");
$p = substr($p,0,$t+3) if ($t >= 0);
$p = "N/A: No Articles Or Citations" if ($p == 999999);
}
print "$subjectfield\t$ppa\t$ppc\t$cpi\t$val\t$fields[$ypindex]\t$fields[$profitindex]\t$fields[$valueindex]\n";
}
exit;
}
#IF THEY DIDN'T WANT TEXT FORMAT, NOW WE HAVE TO OUTPUT HTML HEADERS, ETC.
print header;
print start_html("Search Results");
print < ";
}
#TELL THE USER THERE WERE NO SEARCH RESULTS IF THERE WEREN'T ANY
if (@sorted == 0){
print "
ENDHTML
#AND THE RESULTS, IN HTML TABLE FORMAT
for (my $i=0; $i<=$#sorted; $i++){
my $r = $sorted[$i];
my @fields = split(/\t/,$r);
my $j = $i+1;
print "
Publisher: $fields[$pubindex]
ISSN: $fields[$issnindex]
Subject: ";
my $subjectfield = $fields[$subindex];
$subjectfield =~ s/Nocat/Misc/ig;
my $ppa = $fields[$ppaindex];
my $ppc = $fields[$ppcindex];
my $cpi = $fields[$cpiindex];
my $val = $fields[$cpiavgindex];
foreach my $p ($ppa,$ppc,$cpi,$val){
my $t = index($p,".");
$p = substr($p,0,$t+3) if ($t >= 0);
$p = "N/A: No Articles Or Citations" if ($p == 999999);
}
print "$subjectfield
Profit Status: $fields[$profitindex]
Year First Published: $fields[$ypindex]
Price per article: $ppa
Price per citation: $ppc
Composite Price Index: $cpi
";
}
#THIS JUST TAKES A STRING THAT HAS TABS IN IT DELIMITING FIELDS, AND A NUMBER, AND RETURNS THAT FIELD
sub getField {
my @f = split(/\t/,$_[0]);
return @f[$_[1]];
}
#THIS TAKES A STRING AND AN ARRAY OF WORDS AND RETURNS TRUE IF NONE OF THE WORDS ARE IN THE STRING
#IGNORES COMMAS (USED FOR MAKING SURE A JOURNAL IS THE RIGHT SUBJECT OR VALUE CATEGORY, SO COMMAS
#ARE THE ONLY RELEVANT PUNCTUATION)
sub noneIn {
my $str = $_[0];
$str =~ s/,//g;
my @tests = @_;
splice(@tests,0,1);
foreach my $t (@tests){
return 0 if (" $str " =~ / $t /i);
}
return 1;
}
#THIS TAKES THE SAME ARGUMENTS AS noneIn AND RETURNS TRUE IF ALL OF THE WORDS ARE IN THE STRING
#THIS OBEYS THE REGEX CONVENTIONS ALLOWED BY THE SEARCH PAGE, SO allIn IS A MISNOMER, SEE
#COMMENTS WITHIN THE FUNCTION FOR EXPLANATION
sub allIn {
my $str = $_[0];
#REPLACE HYPHENS WITH SPACES SO EACH HALF IS MATCHED SEPARATELY, I.E.
# "HI-THERE" MATCHES BOTH "HI" "THERE" AND "HI THERE".
$str =~ s/[\-\/]/ /g;
#IGNORE APOSTROPHES, PERIODS, COMMAS, COLONS, AND PARENTHESIS
$str =~ s/[\'\.,:\(\)]//g;
my @tests = @_;
splice(@tests,0,1);
foreach my $t (@tests){
#REMOVE SPACE AROUND EACH OF THE SEARCH WORDS
$t =~ s/^\s//;
$t =~ s/\s$//;
#AND REPLACE ANY PUNCTUATION THE SAME AS WE DID WITH THE SEARCH STRING ABOVE
$t =~ s/\-/ /g;
#$t =~ s/[\']//g;
$t =~ s/\\[\.\'\,:\(\)]//g;
#IF THE SEARCH WORD STARTS WITH "^" IT MEANS THE USER WANTS TO EXCLUDE RESULTS WITH THAT
#IN IT, SO RETURN FALSE IF IT IS IN THE STRING, OTHERWISE RETURN FALSE IF IT'S NOT
if ($t =~ s/^\^//){
return 0 if (!($t eq "") && (" $str " =~ / $t /i));
} else {
return 0 if (!($t eq "") && !(" $str " =~ / $t /i));
}
}
return 1;
}
#THIS TAKES A STRING THAT LOOKS LIKE A NUMBER AND RETURNS THE NUMBER
#IF THE STRING DOESN'T LOOK LIKE A NUMBER, RETURNS -1 (THAT'S OK BECAUSE WHEN THIS FUNCTION IS
#USED IN THIS PROGRAM IT'S CONVENIENT TO ALWAYS GET A NUMBER BACK AND THE RESULTS IN THIS
#PROGRAM SHOULD ALWAYS BE POSITIVE SO -1 WON'T OCCUR).
sub toNumber {
my $num = 0.0;
my $flag = length($_[0]) - 1;
for (my $i=0; $i