DBIx-TextSearch

 view release on metacpan or  search on metacpan

lib/DBIx/TextSearch.pm  view on Meta::CPAN

    my $url = shift();
    # parse an url like ftp://user:password@foo.bar.com/wibble/barf.txt into
    # something usable
    my $uri = URI->new($url);
    my $host = $uri->host();
    my $path = $uri->path();
    my $auth = $uri->authority(); # user:password@host

    my ($username, $passwd);
    if ($auth =~ /:/) {
        # get username and password from $auth
	$username = $auth;
	$username =~ s/:.*//;
	$passwd = $auth;
	$passwd =~ s/$username://;
	$passwd =~ s/@.*//;
	$self->say("auth'd ftp\nusername: $username\nPassword $passwd\n");
    } else {
	# set username to anonymous, password to local (linux) email address
	$username = 'anonymous';
	my $hostname = `hostname`; # need to get domain name as well.
	my $me = $ENV{USER};
	$passwd = $me . '@' . $hostname;
	$self->say("anon ftp\nuser : $username\npass : $passwd\n");
    }

    # remove remote file name from $path into a separate variable
    my $dir = dirname($path);
    my $remote_file = basename($path);


    # get unique name for local file
    my $local_file = _get_unique_filename();

    # fetch the file
    $self->say("logging into $host as $username with password $passwd\n");
    my $ftp = Net::FTP->new($host,
			    Debug => 1,
			    Passive => 1);
    $ftp->login($username, $passwd);
    $ftp->cwd("$dir");
    $ftp->ascii();
    $ftp->get($remote_file, $local_file);
    $ftp->quit();

    # file transferred, return its location
    $self->say("Local file is: $local_file\n");
    return $local_file;
}
######################################################################
sub _http {
    # fetch a file via http and store locally
    my ($self, $url) = @_;
    print "URL to fetch: $url\n";

    # get unique name for local file
    my $local_file = _get_unique_filename();

    # fetch the file
    my $ua = LWP::UserAgent->new;
    my $request = HTTP::Request->new('GET', $url);
    my $response = $ua->request($request);

    if ($response->is_success) {
	# sucessful fetch
	# write to disk
	my $html = $response->content;
	CORE::open(HTML, ">$local_file") or
	  croak "Can't save HTML file $url to $local_file: $!";
	print HTML $html;
	close HTML;
	# file transferred, return its location
	return $local_file;
    } else {
	# error message
	my $error = $response->status_line;
	cluck $error;
    }

}
######################################################################
sub _rem_newer {
    # check the md5 sum of a URI against db.
    # return md5. If not in index, md5 from MD5i eq 'none'
    # par 1 = http|ftp|file. par2 = uri
    my $self = shift();
    my ($ftype, $loc) = @_;
    my $md5_file; # file checksum

    my $md5_db = $self->MD5($loc); # mtime of indexed file
    # $md5_db = 'none' if not in index

    $self->say("is file newer than already indexed version?\n");
    if ($ftype eq 'http') {
	$self->say("checking md5 sum with http\n");
	my $ua = LWP::UserAgent->new(env_proxy => 1,
				     keep_alive => 1,
				     timeout => 30);
	my $response = $ua->get($loc);
	cluck "Error while getting ", $response->request->uri,
	  " -- ", $response->status_line, "\nAborting"
	    unless $response->is_success;
	my $doc = $response->content();
	$md5_file = md5_hex($doc);
	undef $ua;
    } elsif ($ftype eq 'ftp') {
	my $file = $self->_ftp($loc);
	$md5_file = md5_hex($file);
	unlink($file);
    } elsif ($ftype eq 'file') {
	$md5_file = md5_hex($loc);
    }

    $self->say("file checksum : $md5_file\nindex checksum: $md5_db\n");

    if ($md5_file ne $md5_db) {
	# remote file is different from indexed version
	$self->say("uri is different from indexed version\n");
	return ($md5_file, 1);
    } else {
	$self->say("uri is is the same as the indexed version\n");



( run in 0.703 second using v1.01-cache-2.11-cpan-5837b0d9d2c )