AnyEvent-Net-Curl-Queued

 view release on metacpan or  search on metacpan

bin/yada  view on Meta::CPAN

use YADA;

our $VERSION = '0.049'; # VERSION


GetOptions(
    q(help)             => \my $help,
    q(dups!)            => \my $dups,
    q(encoding=s)       => \my $encoding,
    q(max=i)            => \my $max,
    q(maxredirs=i)      => \my $maxredirs,
    q(proxy=s)          => \my $proxy,
    q(quiet!)           => \my $quiet,
    q(referer=s)        => \my $referer,
    q(tcp_nodelay)      => \my $tcp_nodelay,
    q(timeout=i)        => \my $timeout,
    q(useragent=s)      => \my $useragent,
    q(verbose)          => \my $verbose,
    q(skip_existing)    => \my $skip_existing,
    q(shuffle)          => \my $shuffle,
) or pod2usage(q(-verbose) => 1);
pod2usage(q(-verbose) => 1) if $help;

my @urls = <>;
chomp @urls;
@urls = shuffle @urls if $shuffle;

autoflush $_, 1 for (\*STDERR, \*STDOUT);

my $q = YADA->new({
    allow_dups  => $dups // 1,
    max         => $max // 4,
    timeout     => $timeout // 600,
});

my $c = 0;
for my $url (@urls) {
    my $fh;
    my $name = get_filename(URI->new($url), $skip_existing);
    next if $skip_existing && -e $name;
    $q->append(
        $url,
        sub {
            my ($self) = @_;

            # will die() later
            sysopen($fh, $name, O_CREAT | O_NONBLOCK | O_WRONLY);
            binmode $fh;

            $self->setopt(
                encoding            => $encoding // q(),
                maxredirs           => $maxredirs // 5,
                noprogress          => $quiet,
                proxy               => $proxy,
                referer             => $referer,
                tcp_nodelay         => $tcp_nodelay // 0,
                useragent           => $useragent // qq(yada/$VERSION ($Config{archname}; Perl/$Config{version}) @{[ Net::Curl::version() ]}),
                verbose             => $verbose,

                autoreferer         => 1,
                ssl_verifyhost      => 0,
                ssl_verifypeer      => 0,
                unrestricted_auth   => 1,
                writedata           => $fh,
            );
        },
        sub {
            my ($self) = @_;
            blocking $fh, 1;
            flush $fh;
            close $fh;
            ## no critic (ProhibitComplexRegexes)
            #if ($self->has_error or $self->getinfo(q(response_code)) =~ m{^5[0-9]{2}$}) {
            if ($self->has_error) {
                unlink $name;
            } elsif (${$self->header} =~ m{\bContent-Disposition:\s*attachment;\s*filename=("?[\w\.\-]+"?);?}isx) {
                my $filename = $1;
                $filename =~ s/^"|"$//gx;

                move($name, $name . q(.tmp));
                my $new_name = get_filename(URI->new(q(file:///) . $filename));
                move($name . q(.tmp), $new_name);
            } elsif ($self->final_url ne $self->initial_url) {
                move($name, $name . q(.tmp));
                my $new_name = get_filename($self->final_url);
                move($name . q(.tmp), $new_name);
            }
        },
    );
    $c++;
}

$q->wait if $c;

sub get_filename {
    my $url = shift->clone->canonical;
    my $no_check = shift;

    my $orig = ($url->path_segments)[-1] || q(index.html);
    $orig .= q(?) . $url->query
        if $url->query;

    $orig =~ s{[^\w\.\-]}{_}gsx;
    my $name = $orig;
    return $name if $no_check;

    my $i = 1;
    for (;;) {
        -e $name
            ? $name = $orig . q(.) . $i++
            : last;
    }

    return $name;
}

__END__

=pod

=encoding UTF-8



( run in 0.552 second using v1.01-cache-2.11-cpan-5a3173703d6 )