Alien-wxWidgets
view release on metacpan or search on metacpan
inc/inc_File-Fetch/File/Fetch.pm view on Meta::CPAN
package File::Fetch;
use strict;
use FileHandle;
use File::Copy;
use File::Spec;
use File::Spec::Unix;
use File::Basename qw[dirname];
use Cwd qw[cwd];
use Carp qw[carp];
use IPC::Cmd qw[can_run run];
use File::Path qw[mkpath];
use Params::Check qw[check];
use Module::Load::Conditional qw[can_load];
use Locale::Maketext::Simple Style => 'gettext';
use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
$BLACKLIST $METHOD_FAIL $VERSION $METHODS
$FTP_PASSIVE $TIMEOUT $DEBUG $WARN
];
use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] };
$VERSION = '0.10';
$PREFER_BIN = 0; # XXX TODO implement
$FROM_EMAIL = 'File-Fetch@example.com';
$USER_AGENT = 'File::Fetch/$VERSION';
$BLACKLIST = [qw|ftp|];
$METHOD_FAIL = { };
$FTP_PASSIVE = 1;
$TIMEOUT = 0;
$DEBUG = 0;
$WARN = 1;
### methods available to fetch the file depending on the scheme
$METHODS = {
http => [ qw|lwp wget curl lynx| ],
ftp => [ qw|lwp netftp wget curl ncftp ftp| ],
file => [ qw|lwp file| ],
rsync => [ qw|rsync| ]
};
### silly warnings ###
local $Params::Check::VERBOSE = 1;
local $Params::Check::VERBOSE = 1;
local $Module::Load::Conditional::VERBOSE = 0;
local $Module::Load::Conditional::VERBOSE = 0;
### see what OS we are on, important for file:// uris ###
use constant ON_UNIX => ($^O ne 'MSWin32' and
$^O ne 'MacOS' and
$^O ne 'VMS');
=pod
=head1 NAME
File::Fetch - A generic file fetching mechanism
=head1 SYNOPSIS
use File::Fetch;
### build a File::Fetch object ###
my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
inc/inc_File-Fetch/File/Fetch.pm view on Meta::CPAN
Returns false on failure.
=cut
sub new {
my $class = shift;
my %hash = @_;
my ($uri);
my $tmpl = {
uri => { required => 1, store => \$uri },
};
check( $tmpl, \%hash ) or return;
### parse the uri to usable parts ###
my $href = __PACKAGE__->_parse_uri( $uri ) or return;
### make it into a FFI object ###
my $ff = File::Fetch->_create( %$href ) or return;
### return the object ###
return $ff;
}
### parses an uri to a hash structure:
###
### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
###
### becomes:
###
### $href = {
### scheme => 'ftp',
### host => 'ftp.cpan.org',
### path => '/pub/mirror',
### file => 'index.html'
### };
###
sub _parse_uri {
my $self = shift;
my $uri = shift or return;
my $href = { uri => $uri };
### find the scheme ###
$uri =~ s|^(\w+)://||;
$href->{scheme} = $1;
### file:// paths have no host ###
if( $href->{scheme} eq 'file' ) {
$href->{path} = $uri;
$href->{host} = '';
} else {
@{$href}{qw|host path|} = $uri =~ m|([^/]*)(/.*)$|s;
}
### split the path into file + dir ###
{ my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
$href->{path} = $parts[1];
$href->{file} = $parts[2];
}
return $href;
}
=head2 $ff->fetch( [to => /my/output/dir/] )
Fetches the file you requested. By default it writes to C<cwd()>,
but you can override that by specifying the C<to> argument.
Returns the full path to the downloaded file on success, and false
on failure.
=cut
sub fetch {
my $self = shift or return;
my %hash = @_;
my $to;
my $tmpl = {
to => { default => cwd(), store => \$to },
};
check( $tmpl, \%hash ) or return;
### create the path if it doesn't exist yet ###
unless( -d $to ) {
eval { mkpath( $to ) };
return $self->_error(loc("Could not create path '%1'",$to)) if $@;
}
### set passive ftp if required ###
local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
###
for my $method ( @{ $METHODS->{$self->scheme} } ) {
my $sub = '_'.$method.'_fetch';
unless( __PACKAGE__->can($sub) ) {
$self->_error(loc("Cannot call method for '%1' -- WEIRD!",
$method));
next;
}
### method is blacklisted ###
next if grep { lc $_ eq $method } @$BLACKLIST;
### method is known to fail ###
next if $METHOD_FAIL->{$method};
### there's serious issues with IPC::Run and quoting of command
### line arguments. using quotes in the wrong place breaks things,
### and in the case of say,
### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
### it doesn't matter how you quote, it always fails.
local $IPC::Cmd::USE_IPC_RUN = 0;
if( my $file = $self->$sub(
to => File::Spec->catfile( $to, $self->output_file )
)){
unless( -e $file && -s _ ) {
$self->_error(loc("'%1' said it fetched '%2', ".
"but it was not created",$method,$file));
### mark the failure ###
$METHOD_FAIL->{$method} = 1;
next;
} else {
my $abs = File::Spec->rel2abs( $file );
return $abs;
}
}
}
### if we got here, we looped over all methods, but we weren't able
### to fetch it.
return;
}
########################
### _*_fetch methods ###
########################
### LWP fetching ###
sub _lwp_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### modules required to download with lwp ###
my $use_list = {
LWP => '0.0',
'LWP::UserAgent' => '0.0',
'HTTP::Request' => '0.0',
'HTTP::Status' => '0.0',
URI => '0.0',
};
if( can_load(modules => $use_list) ) {
### setup the uri object
my $uri = URI->new( File::Spec::Unix->catfile(
$self->path, $self->file
) );
### special rules apply for file:// uris ###
$uri->scheme( $self->scheme );
$uri->host( $self->scheme eq 'file' ? '' : $self->host );
$uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
### set up the useragent object
my $ua = LWP::UserAgent->new();
$ua->timeout( $TIMEOUT ) if $TIMEOUT;
$ua->agent( $USER_AGENT );
$ua->from( $FROM_EMAIL );
$ua->env_proxy;
my $res = $ua->mirror($uri, $to) or return;
### uptodate or fetched ok ###
if ( $res->code == 304 or $res->code == 200 ) {
return $to;
} else {
return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
$res->code, HTTP::Status::status_message($res->code),
$res->status_line));
}
} else {
$METHOD_FAIL->{'lwp'} = 1;
return;
}
}
### Net::FTP fetching
sub _netftp_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### required modules ###
my $use_list = { 'Net::FTP' => 0 };
if( can_load( modules => $use_list ) ) {
### make connection ###
my $ftp;
my @options = ($self->host);
push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
unless( $ftp = Net::FTP->new( @options ) ) {
return $self->_error(loc("Ftp creation failed: %1",$@));
}
### login ###
unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
return $self->_error(loc("Could not login to '%1'",$self->host));
}
### set binary mode, just in case ###
$ftp->binary;
### create the remote path
### remember remote paths are unix paths! [#11483]
my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
### fetch the file ###
my $target;
unless( $target = $ftp->get( $remote, $to ) ) {
return $self->_error(loc("Could not fetch '%1' from '%2'",
$remote, $self->host));
}
### log out ###
$ftp->quit;
return $target;
} else {
$METHOD_FAIL->{'netftp'} = 1;
return;
}
}
### /bin/wget fetch ###
sub _wget_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### see if we have a wget binary ###
if( my $wget = can_run('wget') ) {
### no verboseness, thanks ###
my $cmd = [ $wget, '--quiet' ];
### if a timeout is set, add it ###
push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
### run passive if specified ###
push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
### set the output document, add the uri ###
push @$cmd, '--output-document',
### DO NOT quote things for IPC::Run, it breaks stuff.
$IPC::Cmd::USE_IPC_RUN
? ($to, $self->uri)
: (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
### shell out ###
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG
)) {
### wget creates the output document always, even if the fetch
### fails.. so unlink it in that case
1 while unlink $to;
return $self->_error(loc( "Command failed: %1", $captured || '' ));
inc/inc_File-Fetch/File/Fetch.pm view on Meta::CPAN
push @$cmd, $IPC::Cmd::USE_IPC_RUN
? $self->uri
: QUOTE. $self->uri .QUOTE;
### shell out ###
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG )
) {
return $self->_error(loc("Command failed: %1", $captured || ''));
}
### print to local file ###
### XXX on a 404 with a special error page, $captured will actually
### hold the contents of that page, and make it *appear* like the
### request was a success, when really it wasn't :(
### there doesn't seem to be an option for lynx to change the exit
### code based on a 4XX status or so.
### the closest we can come is using --error_file and parsing that,
### which is very unreliable ;(
$local->print( $captured );
$local->close or return;
return $to;
} else {
$METHOD_FAIL->{'lynx'} = 1;
return;
}
}
### use /bin/ncftp to fetch files
sub _ncftp_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### we can only set passive mode in interactive sesssions, so bail out
### if $FTP_PASSIVE is set
return if $FTP_PASSIVE;
### see if we have a ncftp binary ###
if( my $ncftp = can_run('ncftp') ) {
my $cmd = [
$ncftp,
'-V', # do not be verbose
'-p', $FROM_EMAIL, # email as password
$self->host, # hostname
dirname($to), # local dir for the file
# remote path to the file
### DO NOT quote things for IPC::Run, it breaks stuff.
$IPC::Cmd::USE_IPC_RUN
? File::Spec::Unix->catdir( $self->path, $self->file )
: QUOTE. File::Spec::Unix->catdir(
$self->path, $self->file ) .QUOTE
];
### shell out ###
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG )
) {
return $self->_error(loc("Command failed: %1", $captured || ''));
}
return $to;
} else {
$METHOD_FAIL->{'ncftp'} = 1;
return;
}
}
### use /bin/curl to fetch files
sub _curl_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
if (my $curl = can_run('curl')) {
### these long opts are self explanatory - I like that -jmb
my $cmd = [ $curl ];
push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
push(@$cmd, '--silent') unless $DEBUG;
### curl does the right thing with passive, regardless ###
if ($self->scheme eq 'ftp') {
push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
}
### curl doesn't follow 302 (temporarily moved) etc automatically
### so we add --location to enable that.
push @$cmd, '--fail', '--location', '--output',
### DO NOT quote things for IPC::Run, it breaks stuff.
$IPC::Cmd::USE_IPC_RUN
? ($to, $self->uri)
: (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG )
) {
return $self->_error(loc("Command failed: %1", $captured || ''));
}
return $to;
} else {
$METHOD_FAIL->{'curl'} = 1;
return;
}
}
### use File::Copy for fetching file:// urls ###
### XXX file:// uri to local path conversion is just too weird...
### depend on LWP to do it for us
sub _file_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### prefix a / on unix systems with a file uri, since it would
### look somewhat like this:
### file://home/kane/file
### wheras windows file uris might look like:
### file://C:/home/kane/file
my $path = ON_UNIX ? '/'. $self->path : $self->path;
my $remote = File::Spec->catfile( $path, $self->file );
### File::Copy is littered with 'die' statements :( ###
my $rv = eval { File::Copy::copy( $remote, $to ) };
### something went wrong ###
if( !$rv or $@ ) {
return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
$remote, $to, $!, $@));
}
return $to;
}
### use /usr/bin/rsync to fetch files
sub _rsync_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
if (my $rsync = can_run('rsync')) {
my $cmd = [ $rsync ];
### XXX: rsync has no I/O timeouts at all, by default
push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
push(@$cmd, '--quiet') unless $DEBUG;
### DO NOT quote things for IPC::Run, it breaks stuff.
push @$cmd, $IPC::Cmd::USE_IPC_RUN
? ($self->uri, $to)
: (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE);
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG )
) {
return $self->_error(loc("Command failed: %1", $captured || ''));
}
return $to;
} else {
$METHOD_FAIL->{'rsync'} = 1;
return;
}
( run in 0.746 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )