DBIx-TextSearch
view release on metacpan or search on metacpan
lib/DBIx/TextSearch.pm view on Meta::CPAN
my $url = shift();
# parse an url like ftp://user:password@foo.bar.com/wibble/barf.txt into
# something usable
my $uri = URI->new($url);
my $host = $uri->host();
my $path = $uri->path();
my $auth = $uri->authority(); # user:password@host
my ($username, $passwd);
if ($auth =~ /:/) {
# get username and password from $auth
$username = $auth;
$username =~ s/:.*//;
$passwd = $auth;
$passwd =~ s/$username://;
$passwd =~ s/@.*//;
$self->say("auth'd ftp\nusername: $username\nPassword $passwd\n");
} else {
# set username to anonymous, password to local (linux) email address
$username = 'anonymous';
my $hostname = `hostname`; # need to get domain name as well.
my $me = $ENV{USER};
$passwd = $me . '@' . $hostname;
$self->say("anon ftp\nuser : $username\npass : $passwd\n");
}
# remove remote file name from $path into a separate variable
my $dir = dirname($path);
my $remote_file = basename($path);
# get unique name for local file
my $local_file = _get_unique_filename();
# fetch the file
$self->say("logging into $host as $username with password $passwd\n");
my $ftp = Net::FTP->new($host,
Debug => 1,
Passive => 1);
$ftp->login($username, $passwd);
$ftp->cwd("$dir");
$ftp->ascii();
$ftp->get($remote_file, $local_file);
$ftp->quit();
# file transferred, return its location
$self->say("Local file is: $local_file\n");
return $local_file;
}
######################################################################
sub _http {
# fetch a file via http and store locally
my ($self, $url) = @_;
print "URL to fetch: $url\n";
# get unique name for local file
my $local_file = _get_unique_filename();
# fetch the file
my $ua = LWP::UserAgent->new;
my $request = HTTP::Request->new('GET', $url);
my $response = $ua->request($request);
if ($response->is_success) {
# sucessful fetch
# write to disk
my $html = $response->content;
CORE::open(HTML, ">$local_file") or
croak "Can't save HTML file $url to $local_file: $!";
print HTML $html;
close HTML;
# file transferred, return its location
return $local_file;
} else {
# error message
my $error = $response->status_line;
cluck $error;
}
}
######################################################################
sub _rem_newer {
# check the md5 sum of a URI against db.
# return md5. If not in index, md5 from MD5i eq 'none'
# par 1 = http|ftp|file. par2 = uri
my $self = shift();
my ($ftype, $loc) = @_;
my $md5_file; # file checksum
my $md5_db = $self->MD5($loc); # mtime of indexed file
# $md5_db = 'none' if not in index
$self->say("is file newer than already indexed version?\n");
if ($ftype eq 'http') {
$self->say("checking md5 sum with http\n");
my $ua = LWP::UserAgent->new(env_proxy => 1,
keep_alive => 1,
timeout => 30);
my $response = $ua->get($loc);
cluck "Error while getting ", $response->request->uri,
" -- ", $response->status_line, "\nAborting"
unless $response->is_success;
my $doc = $response->content();
$md5_file = md5_hex($doc);
undef $ua;
} elsif ($ftype eq 'ftp') {
my $file = $self->_ftp($loc);
$md5_file = md5_hex($file);
unlink($file);
} elsif ($ftype eq 'file') {
$md5_file = md5_hex($loc);
}
$self->say("file checksum : $md5_file\nindex checksum: $md5_db\n");
if ($md5_file ne $md5_db) {
# remote file is different from indexed version
$self->say("uri is different from indexed version\n");
return ($md5_file, 1);
} else {
$self->say("uri is is the same as the indexed version\n");
( run in 0.703 second using v1.01-cache-2.11-cpan-5837b0d9d2c )