Alien-wxWidgets

 view release on metacpan or  search on metacpan

inc/inc_File-Fetch/File/Fetch.pm  view on Meta::CPAN

package File::Fetch;

use strict;
use FileHandle;
use File::Copy;
use File::Spec;
use File::Spec::Unix;
use File::Basename              qw[dirname];

use Cwd                         qw[cwd];
use Carp                        qw[carp];
use IPC::Cmd                    qw[can_run run];
use File::Path                  qw[mkpath];
use Params::Check               qw[check];
use Module::Load::Conditional   qw[can_load];
use Locale::Maketext::Simple    Style => 'gettext';

use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
                $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
            ];

use constant QUOTE  => do { $^O eq 'MSWin32' ? q["] : q['] };            
            

$VERSION        = '0.10';
$PREFER_BIN     = 0;        # XXX TODO implement
$FROM_EMAIL     = 'File-Fetch@example.com';
$USER_AGENT     = 'File::Fetch/$VERSION';
$BLACKLIST      = [qw|ftp|];
$METHOD_FAIL    = { };
$FTP_PASSIVE    = 1;
$TIMEOUT        = 0;
$DEBUG          = 0;
$WARN           = 1;

### methods available to fetch the file depending on the scheme
$METHODS = {
    http    => [ qw|lwp wget curl lynx| ],
    ftp     => [ qw|lwp netftp wget curl ncftp ftp| ],
    file    => [ qw|lwp file| ],
    rsync   => [ qw|rsync| ]
};

### silly warnings ###
local $Params::Check::VERBOSE               = 1;
local $Params::Check::VERBOSE               = 1;
local $Module::Load::Conditional::VERBOSE   = 0;
local $Module::Load::Conditional::VERBOSE   = 0;

### see what OS we are on, important for file:// uris ###
use constant ON_UNIX        => ($^O ne 'MSWin32' and
                                $^O ne 'MacOS'   and
                                $^O ne 'VMS');

=pod

=head1 NAME

File::Fetch - A generic file fetching mechanism

=head1 SYNOPSIS

    use File::Fetch;

    ### build a File::Fetch object ###
    my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');

inc/inc_File-Fetch/File/Fetch.pm  view on Meta::CPAN


Returns false on failure.

=cut

sub new {
    my $class = shift;
    my %hash  = @_;

    my ($uri);
    my $tmpl = {
        uri => { required => 1, store => \$uri },
    };

    check( $tmpl, \%hash ) or return;

    ### parse the uri to usable parts ###
    my $href    = __PACKAGE__->_parse_uri( $uri ) or return;

    ### make it into a FFI object ###
    my $ff      = File::Fetch->_create( %$href ) or return;


    ### return the object ###
    return $ff;
}

### parses an uri to a hash structure:
###
### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
###
### becomes:
###
### $href = {
###     scheme  => 'ftp',
###     host    => 'ftp.cpan.org',
###     path    => '/pub/mirror',
###     file    => 'index.html'
### };
###
sub _parse_uri {
    my $self = shift;
    my $uri  = shift or return;

    my $href = { uri => $uri };

    ### find the scheme ###
    $uri            =~ s|^(\w+)://||;
    $href->{scheme} = $1;

    ### file:// paths have no host ###
    if( $href->{scheme} eq 'file' ) {
        $href->{path} = $uri;
        $href->{host} = '';

    } else {
        @{$href}{qw|host path|} = $uri =~ m|([^/]*)(/.*)$|s;
    }

    ### split the path into file + dir ###
    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
        $href->{path} = $parts[1];
        $href->{file} = $parts[2];
    }


    return $href;
}

=head2 $ff->fetch( [to => /my/output/dir/] )

Fetches the file you requested. By default it writes to C<cwd()>,
but you can override that by specifying the C<to> argument.

Returns the full path to the downloaded file on success, and false
on failure.

=cut

sub fetch {
    my $self = shift or return;
    my %hash = @_;

    my $to;
    my $tmpl = {
        to  => { default => cwd(), store => \$to },
    };

    check( $tmpl, \%hash ) or return;

    ### create the path if it doesn't exist yet ###
    unless( -d $to ) {
        eval { mkpath( $to ) };

        return $self->_error(loc("Could not create path '%1'",$to)) if $@;
    }

    ### set passive ftp if required ###
    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;

    ###
    for my $method ( @{ $METHODS->{$self->scheme} } ) {
        my $sub =  '_'.$method.'_fetch';

        unless( __PACKAGE__->can($sub) ) {
            $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
                        $method));
            next;
        }

        ### method is blacklisted ###
        next if grep { lc $_ eq $method } @$BLACKLIST;

        ### method is known to fail ###
        next if $METHOD_FAIL->{$method};

        ### there's serious issues with IPC::Run and quoting of command
        ### line arguments. using quotes in the wrong place breaks things,
        ### and in the case of say, 
        ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
        ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
        ### it doesn't matter how you quote, it always fails.
        local $IPC::Cmd::USE_IPC_RUN = 0;
        
        if( my $file = $self->$sub( 
                        to => File::Spec->catfile( $to, $self->output_file )
        )){

            unless( -e $file && -s _ ) {
                $self->_error(loc("'%1' said it fetched '%2', ".
                     "but it was not created",$method,$file));

                ### mark the failure ###
                $METHOD_FAIL->{$method} = 1;

                next;

            } else {

                my $abs = File::Spec->rel2abs( $file );
                return $abs;
            }
        }
    }


    ### if we got here, we looped over all methods, but we weren't able
    ### to fetch it.
    return;
}

########################
### _*_fetch methods ###
########################

### LWP fetching ###
sub _lwp_fetch {
    my $self = shift;
    my %hash = @_;

    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };
    check( $tmpl, \%hash ) or return;

    ### modules required to download with lwp ###
    my $use_list = {
        LWP                 => '0.0',
        'LWP::UserAgent'    => '0.0',
        'HTTP::Request'     => '0.0',
        'HTTP::Status'      => '0.0',
        URI                 => '0.0',

    };

    if( can_load(modules => $use_list) ) {

        ### setup the uri object
        my $uri = URI->new( File::Spec::Unix->catfile(
                                    $self->path, $self->file
                        ) );

        ### special rules apply for file:// uris ###
        $uri->scheme( $self->scheme );
        $uri->host( $self->scheme eq 'file' ? '' : $self->host );
        $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';

        ### set up the useragent object
        my $ua = LWP::UserAgent->new();
        $ua->timeout( $TIMEOUT ) if $TIMEOUT;
        $ua->agent( $USER_AGENT );
        $ua->from( $FROM_EMAIL );
        $ua->env_proxy;

        my $res = $ua->mirror($uri, $to) or return;

        ### uptodate or fetched ok ###
        if ( $res->code == 304 or $res->code == 200 ) {
            return $to;

        } else {
            return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
                        $res->code, HTTP::Status::status_message($res->code),
                        $res->status_line));
        }

    } else {
        $METHOD_FAIL->{'lwp'} = 1;
        return;
    }
}

### Net::FTP fetching
sub _netftp_fetch {
    my $self = shift;
    my %hash = @_;

    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };
    check( $tmpl, \%hash ) or return;

    ### required modules ###
    my $use_list = { 'Net::FTP' => 0 };

    if( can_load( modules => $use_list ) ) {

        ### make connection ###
        my $ftp;
        my @options = ($self->host);
        push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
        unless( $ftp = Net::FTP->new( @options ) ) {
            return $self->_error(loc("Ftp creation failed: %1",$@));
        }

        ### login ###
        unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
            return $self->_error(loc("Could not login to '%1'",$self->host));
        }

        ### set binary mode, just in case ###
        $ftp->binary;

        ### create the remote path 
        ### remember remote paths are unix paths! [#11483]
        my $remote = File::Spec::Unix->catfile( $self->path, $self->file );

        ### fetch the file ###
        my $target;
        unless( $target = $ftp->get( $remote, $to ) ) {
            return $self->_error(loc("Could not fetch '%1' from '%2'",
                        $remote, $self->host));
        }

        ### log out ###
        $ftp->quit;

        return $target;

    } else {
        $METHOD_FAIL->{'netftp'} = 1;
        return;
    }
}

### /bin/wget fetch ###
sub _wget_fetch {
    my $self = shift;
    my %hash = @_;

    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };
    check( $tmpl, \%hash ) or return;

    ### see if we have a wget binary ###
    if( my $wget = can_run('wget') ) {

        ### no verboseness, thanks ###
        my $cmd = [ $wget, '--quiet' ];

        ### if a timeout is set, add it ###
        push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;

        ### run passive if specified ###
        push @$cmd, '--passive-ftp' if $FTP_PASSIVE;

        ### set the output document, add the uri ###
        push @$cmd, '--output-document', 
                    ### DO NOT quote things for IPC::Run, it breaks stuff.
                    $IPC::Cmd::USE_IPC_RUN
                        ? ($to, $self->uri)
                        : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);

        ### shell out ###
        my $captured;
        unless(run( command => $cmd, 
                    buffer  => \$captured, 
                    verbose => $DEBUG  
        )) {
            ### wget creates the output document always, even if the fetch
            ### fails.. so unlink it in that case
            1 while unlink $to;
            
            return $self->_error(loc( "Command failed: %1", $captured || '' ));

inc/inc_File-Fetch/File/Fetch.pm  view on Meta::CPAN

        push @$cmd, $IPC::Cmd::USE_IPC_RUN
                        ? $self->uri
                        : QUOTE. $self->uri .QUOTE;


        ### shell out ###
        my $captured;
        unless(run( command => $cmd,
                    buffer  => \$captured,
                    verbose => $DEBUG )
        ) {
            return $self->_error(loc("Command failed: %1", $captured || ''));
        }

        ### print to local file ###
        ### XXX on a 404 with a special error page, $captured will actually
        ### hold the contents of that page, and make it *appear* like the
        ### request was a success, when really it wasn't :(
        ### there doesn't seem to be an option for lynx to change the exit
        ### code based on a 4XX status or so.
        ### the closest we can come is using --error_file and parsing that,
        ### which is very unreliable ;(
        $local->print( $captured );
        $local->close or return;

        return $to;

    } else {
        $METHOD_FAIL->{'lynx'} = 1;
        return;
    }
}

### use /bin/ncftp to fetch files
sub _ncftp_fetch {
    my $self = shift;
    my %hash = @_;

    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };
    check( $tmpl, \%hash ) or return;

    ### we can only set passive mode in interactive sesssions, so bail out
    ### if $FTP_PASSIVE is set
    return if $FTP_PASSIVE;

    ### see if we have a ncftp binary ###
    if( my $ncftp = can_run('ncftp') ) {

        my $cmd = [
            $ncftp,
            '-V',                   # do not be verbose
            '-p', $FROM_EMAIL,      # email as password
            $self->host,            # hostname
            dirname($to),           # local dir for the file
                                    # remote path to the file
            ### DO NOT quote things for IPC::Run, it breaks stuff.
            $IPC::Cmd::USE_IPC_RUN
                        ? File::Spec::Unix->catdir( $self->path, $self->file )
                        : QUOTE. File::Spec::Unix->catdir( 
                                        $self->path, $self->file ) .QUOTE
            
        ];

        ### shell out ###
        my $captured;
        unless(run( command => $cmd,
                    buffer  => \$captured,
                    verbose => $DEBUG )
        ) {
            return $self->_error(loc("Command failed: %1", $captured || ''));
        }

        return $to;

    } else {
        $METHOD_FAIL->{'ncftp'} = 1;
        return;
    }
}

### use /bin/curl to fetch files
sub _curl_fetch {
    my $self = shift;
    my %hash = @_;

    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };
    check( $tmpl, \%hash ) or return;

    if (my $curl = can_run('curl')) {

        ### these long opts are self explanatory - I like that -jmb
	    my $cmd = [ $curl ];

	    push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;

	    push(@$cmd, '--silent') unless $DEBUG;

        ### curl does the right thing with passive, regardless ###
    	if ($self->scheme eq 'ftp') {
    		push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
    	}

        ### curl doesn't follow 302 (temporarily moved) etc automatically
        ### so we add --location to enable that.
        push @$cmd, '--fail', '--location', '--output', 
                    ### DO NOT quote things for IPC::Run, it breaks stuff.
                    $IPC::Cmd::USE_IPC_RUN
                        ? ($to, $self->uri)
                        : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);

        my $captured;
        unless(run( command => $cmd,
                    buffer  => \$captured,
                    verbose => $DEBUG )
        ) {

            return $self->_error(loc("Command failed: %1", $captured || ''));
        }

        return $to;

    } else {
        $METHOD_FAIL->{'curl'} = 1;
        return;
    }
}


### use File::Copy for fetching file:// urls ###
### XXX file:// uri to local path conversion is just too weird...
### depend on LWP to do it for us
sub _file_fetch {
    my $self = shift;
    my %hash = @_;

    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };
    check( $tmpl, \%hash ) or return;

    ### prefix a / on unix systems with a file uri, since it would
    ### look somewhat like this:
    ###     file://home/kane/file
    ### wheras windows file uris might look like:
    ###     file://C:/home/kane/file
    my $path    = ON_UNIX ? '/'. $self->path : $self->path;

    my $remote  = File::Spec->catfile( $path, $self->file );

    ### File::Copy is littered with 'die' statements :( ###
    my $rv = eval { File::Copy::copy( $remote, $to ) };

    ### something went wrong ###
    if( !$rv or $@ ) {
        return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
                             $remote, $to, $!, $@));
    }

    return $to;
}

### use /usr/bin/rsync to fetch files
sub _rsync_fetch {
    my $self = shift;
    my %hash = @_;

    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };
    check( $tmpl, \%hash ) or return;

    if (my $rsync = can_run('rsync')) {

        my $cmd = [ $rsync ];

        ### XXX: rsync has no I/O timeouts at all, by default
        push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;

        push(@$cmd, '--quiet') unless $DEBUG;

        ### DO NOT quote things for IPC::Run, it breaks stuff.
        push @$cmd, $IPC::Cmd::USE_IPC_RUN
                        ? ($self->uri, $to)
                        : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE);

        my $captured;
        unless(run( command => $cmd,
                    buffer  => \$captured,
                    verbose => $DEBUG )
        ) {

            return $self->_error(loc("Command failed: %1", $captured || ''));
        }

        return $to;

    } else {
        $METHOD_FAIL->{'rsync'} = 1;
        return;
    }



( run in 0.746 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )