onsearch

 view release on metacpan or  search on metacpan

cgi/archive.cgi  view on Meta::CPAN

#!/usr/bin/perl -w

#$Id: archive.cgi,v 1.5 2005/08/11 06:31:13 kiesling Exp $

BEGIN { use Config; unshift @INC, ("./lib", "./lib/$Config{archname}"); }

use OnSearch;

use OnSearch::AppConfig;
use OnSearch::Base64;
use OnSearch::CGIQuery;
use OnSearch::UI;
use OnSearch::Utils;
use OnSearch::WebLog;
use OnSearch::WebClient;

my $cfg = OnSearch::AppConfig -> new;
my $prefs_val = undef;

my $q = OnSearch::CGIQuery -> new;
$q -> parsequery;

my $ui = OnSearch::UI -> new;

if ($ENV{HTTP_REFERER} =~ /archive\.(shtml|cgi)/) {
    my $req_method = $ENV{REQUEST_METHOD};
    ###
    ### POST means that a file is being uploaded.
    ###
    if ($req_method =~ /POST/) { 
	my ($q, $boundary, $content, @content, $content_length, $cs);
	my ($fname, $content_type, $file, @plugins, $tmpfname, $oldrs);
	($boundary) = ($ENV{CONTENT_TYPE} =~ /boundary=(.*)/i);
	$content_length = $ENV{CONTENT_LENGTH};
	binmode STDIN, ':crlf';
	read STDIN, $content, $content_length;
	@content = split m"--$boundary", $content;
	foreach my $cs (@content) {
	    if ($cs =~ /filename=/is) {
		($fname) = ($cs =~ /filename=\"(.*)\"/);
		($content_type) = ($cs =~ /Content-Type:\s+(\S+)/);
		($file) = ($cs =~ /\015\012\015\012(.*)\015\012/s);
	    }
	}
	$fname = basename ($fname);
	@plugins = $cfg -> lst (qw/PlugIn/);
	if (! scalar grep /$content_type/, @plugins) {
	    $ui -> error_dialog ("Warning\\nOnSearch does not have a plugin " .
				 "for this type of document." ) -> wprint;
	}
	$tmpfname = $fname;
	for (my $ext = 1; -f "uploads/$tmpfname"; $ext++) {
	    $tmpfname = "$fname.$ext";
	}
	
	$oldrs = $/; undef $/;
	open FILE, ">uploads/$tmpfname" or die "$tmpfname: $!\n";
	print FILE $file;
	close FILE;
	$/ = $oldrs if $oldrs;
    } elsif ($req_method =~ /GET/ && $q -> {targeturl}) {
	###
	### Here it means we're indexing a Web page or a Web site.
	###
	my ($yeardate, $proto_name, $server, $port, $path);
	my ($app_uri, $l, @lines, $dirtree, $serverdirname);
	my ($robotspage, $robotsfile, $page, $client_pid);
	$prefs_val = $cfg -> webidx_prefs_val ($q);
	($proto_name, $server, $port, $path) = parse_url ($q->{targeturl});
	unless ($server) {
	    browser_warn ("Could not index URL: " . $q -> {targeturl} .'.');
	    exit 1;
	}
	$webbot = OnSearch::WebBot -> new;
	$serverdirname = (($port == 80) ? "/$server" : "/$server:$port");
	if ($robotspage) {
	    $robotsfile = OnSearch::RobotsDotTxt -> new;
	    $robotsfile -> parse ($robotspage);
	    clf ('notice', 
		 "Web site %s: %s found.", $q -> param_value(qw/targeturl/), 
		 'robots.txt');
	}
	exit 1 if ($robotsfile && 
		   $robotsfile -> is_disallowed ('OnSearch', 
				 $q->param_value (qw/targeturl/)));
	if ($q -> param_value ('targetscope') =~ /site/) {
	    ###
	    ### If returning from the child process, exit immediately.
	    ###
	    if (($client_pid = 
		$webbot -> siteindex ($q -> param_value (qw/targeturl/)))
		== 0) {
		exit $client_pid;
	    }
	} else {
	    ($dirtree) = ($path =~ /(.*)\//);
	    $dirtree = ($dirtree) ? 



( run in 0.814 second using v1.01-cache-2.11-cpan-71847e10f99 )