onsearch
view release on metacpan or search on metacpan
cgi/archive.cgi view on Meta::CPAN
#!/usr/bin/perl -w
#$Id: archive.cgi,v 1.5 2005/08/11 06:31:13 kiesling Exp $
BEGIN { use Config; unshift @INC, ("./lib", "./lib/$Config{archname}"); }
use OnSearch;
use OnSearch::AppConfig;
use OnSearch::Base64;
use OnSearch::CGIQuery;
use OnSearch::UI;
use OnSearch::Utils;
use OnSearch::WebLog;
use OnSearch::WebClient;
my $cfg = OnSearch::AppConfig -> new;
my $prefs_val = undef;
my $q = OnSearch::CGIQuery -> new;
$q -> parsequery;
my $ui = OnSearch::UI -> new;
if ($ENV{HTTP_REFERER} =~ /archive\.(shtml|cgi)/) {
my $req_method = $ENV{REQUEST_METHOD};
###
### POST means that a file is being uploaded.
###
if ($req_method =~ /POST/) {
my ($q, $boundary, $content, @content, $content_length, $cs);
my ($fname, $content_type, $file, @plugins, $tmpfname, $oldrs);
($boundary) = ($ENV{CONTENT_TYPE} =~ /boundary=(.*)/i);
$content_length = $ENV{CONTENT_LENGTH};
binmode STDIN, ':crlf';
read STDIN, $content, $content_length;
@content = split m"--$boundary", $content;
foreach my $cs (@content) {
if ($cs =~ /filename=/is) {
($fname) = ($cs =~ /filename=\"(.*)\"/);
($content_type) = ($cs =~ /Content-Type:\s+(\S+)/);
($file) = ($cs =~ /\015\012\015\012(.*)\015\012/s);
}
}
$fname = basename ($fname);
@plugins = $cfg -> lst (qw/PlugIn/);
if (! scalar grep /$content_type/, @plugins) {
$ui -> error_dialog ("Warning\\nOnSearch does not have a plugin " .
"for this type of document." ) -> wprint;
}
$tmpfname = $fname;
for (my $ext = 1; -f "uploads/$tmpfname"; $ext++) {
$tmpfname = "$fname.$ext";
}
$oldrs = $/; undef $/;
open FILE, ">uploads/$tmpfname" or die "$tmpfname: $!\n";
print FILE $file;
close FILE;
$/ = $oldrs if $oldrs;
} elsif ($req_method =~ /GET/ && $q -> {targeturl}) {
###
### Here it means we're indexing a Web page or a Web site.
###
my ($yeardate, $proto_name, $server, $port, $path);
my ($app_uri, $l, @lines, $dirtree, $serverdirname);
my ($robotspage, $robotsfile, $page, $client_pid);
$prefs_val = $cfg -> webidx_prefs_val ($q);
($proto_name, $server, $port, $path) = parse_url ($q->{targeturl});
unless ($server) {
browser_warn ("Could not index URL: " . $q -> {targeturl} .'.');
exit 1;
}
$webbot = OnSearch::WebBot -> new;
$serverdirname = (($port == 80) ? "/$server" : "/$server:$port");
if ($robotspage) {
$robotsfile = OnSearch::RobotsDotTxt -> new;
$robotsfile -> parse ($robotspage);
clf ('notice',
"Web site %s: %s found.", $q -> param_value(qw/targeturl/),
'robots.txt');
}
exit 1 if ($robotsfile &&
$robotsfile -> is_disallowed ('OnSearch',
$q->param_value (qw/targeturl/)));
if ($q -> param_value ('targetscope') =~ /site/) {
###
### If returning from the child process, exit immediately.
###
if (($client_pid =
$webbot -> siteindex ($q -> param_value (qw/targeturl/)))
== 0) {
exit $client_pid;
}
} else {
($dirtree) = ($path =~ /(.*)\//);
$dirtree = ($dirtree) ?
$webbot -> {cachedir} . "$serverdirname$dirtree" :
$webbot -> {cachedir} . $serverdirname;
$webbot -> mkdirtree ($dirtree, 0755);
$page = get_req ($q->{targeturl});
if (! $page || $page =~ m|HTTP/1.[01]\s+[45](\d+)|) {
$page = 'Error requesting page.' unless $page;
browser_warn ($q->{targeturl} . ": $page");
exit 1;
}
$webbot -> cache_page ($page, $q -> param_value (qw/targeturl/));
}
}
}
my (@cookies, $key, $val, $prefs);
$prefs = 'defaults';
if (($ENV{HTTP_COOKIE}) && (! $prefs_val)) {
@cookies = split /\;\s?/, $ENV{HTTP_COOKIE};
($val) = grep (/webidx/, @cookies);
if ($val) {
($val) = $val =~ /.*?\=(.*)/;
$prefs = $cfg -> get_prefs ($val);
}
} elsif ($prefs_val) {
$prefs = $cfg -> get_prefs ($prefs_val);
}
if (defined ($prefs_val)) {
# Expire cookie in a year.
my $yeardate = OnSearch::Utils::http_date (31536000);
$ui -> header_cookie ('OnSearch', 'webidx', $prefs_val,
$yeardate) -> wprint;
} else {
$ui -> header_css ('OnSearch') -> wprint;
}
$ui -> navbar_map -> wprint;
$ui -> javascripts -> wprint;
$ui -> navbar -> wprint;
$ui -> archive_title -> wprint;
$ui -> webindex_form ($prefs) -> wprint;
$ui -> fileindex_form -> wprint;
$ui -> html_footer -> wprint;
( run in 0.646 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )