AnyEvent-Net-Curl-Queued

 view release on metacpan or  search on metacpan

eg/CrawlApache.pm  view on Meta::CPAN

package CrawlApache;
use strict;
use utf8;
use warnings qw(all);
use feature qw(say);

use Moo;
use MooX::Types::MooseLike::Base qw(InstanceOf);
use Web::Scraper::LibXML;

extends 'YADA::Worker';

has scrap => (
    is      => 'ro',
    isa     => InstanceOf['Web::Scraper'],
    default => sub {
        scraper {
            process q(//a),
                q(links[]) => q(@href)
        };
    },
    lazy    => 1,
);

has '+use_stats' => (default => sub { 1 });

after finish => sub {
    my ($self, $result) = @_;

    say $result . "\t" . $self->final_url;

    if (
        not $self->has_error
        and $self->getinfo('content_type') =~ m{^text/html}x
    ) {
        my $res = $self
            ->scrap
            ->scrape(
                ${$self->data},
                $self->final_url
            );
        for my $link (
            grep {
                $_->scheme eq 'http'
                and $_->host eq 'localhost'
            } @{$res->{links}}
        ) {
            $self->queue->prepend(sub {
                CrawlApache->new(
                    initial_url => $link,
                    scrap       => $self->scrap,
                );
            });
        }
    }
};

1;



( run in 0.713 second using v1.01-cache-2.11-cpan-39bf76dae61 )