WWW-Sitemapper

 view release on metacpan or  search on metacpan

t/sitemapper.t  view on Meta::CPAN


    use base qw( WWW::Sitemapper );

    has 'url_parents' => (
        is => 'rw',
        isa => 'HashRef',
        default => sub { +{} },
    );

    sub _build_robot_config {
        my $self = shift;

        return {
            NAME => 'MyRobot',
            EMAIL => 'me@domain.tld',
            DELAY => 0,
        };
    }

    sub url_test : Hook('follow-url-test') {
        my $self = shift;
        my ($robot, $hook_name, $uri) = @_;

        my @restricted = (
            qr{^/12.html},
        );

        my $url = $uri->path_query;

        if ( $self->site->host eq $uri->host ) {
            for my $re ( @restricted ) {
                if ( $url =~ /$re/ ) {
                    return 0;
                }
            }

            return 1;
        }

        return 0;
    }

    sub incorrect_attr : Attr(whatever) {
        my $self = shift;

        return 1;
    }

    around '_map_builder' => sub {
        my $orig = shift;
        my $self = shift;
        my ($robot, $hook_name, $from_url, $to_url) = @_;

        $self->url_parents->{$to_url->path}->{$from_url->path} = 1;

        $self->$orig( $robot, $hook_name, $from_url, $to_url );
    };
};

# We want to be safe from non-resolving local host names
delete $ENV{HTTP_PROXY};
my $d = HTTP::Daemon->new( LocalAddr => 'localhost' ) || die;
my $server_host = $d->url;
my $is_test;
my $STATUS_STORAGE_FILE = "t/status.storage";
my $TEST_TIME = time();
my $W3C_DATETIME = DateTime->from_epoch(
        epoch => $TEST_TIME
)->strftime('%FT%T%z');
$W3C_DATETIME =~ s/(\d{2})$/:$1/;
# for qr
$W3C_DATETIME =~ s/([\-\+])/\\$1/g;
my $HTTP_DATE = time2str($TEST_TIME);

if ($is_test = fork ) {
    # wait for server to start up
    sleep 1;

    my @valid_links = map {
        "$server_host$_"
    } qw(
        index.html
            1.html
                11.html
                    3.html
                        31.html
                        32.html
            2.html
                21.html
                22.html
    );
    my @valid_redirects = map {
        "$server_host$_"
    } qw(
        friendly_url.html
    );

    my $mapper;

    lives_ok {
        $mapper = MyWebSite::Map->new(
            site => "${server_host}index.html",
            status_storage => $STATUS_STORAGE_FILE,
            auto_save => 0.02,
        );
    } "mapper object created";

    lives_ok {
        $mapper->run();
    } "run() works";

    my $root = $mapper->tree;

    is_deeply(
        [ sort keys %{ $root->_dictionary } ],
        [ sort @valid_links ],
        "valid links were fetched"
    );
    is_deeply(
        [ sort keys %{ $root->_redirects } ],
        [ sort @valid_redirects ],



( run in 0.328 second using v1.01-cache-2.11-cpan-71847e10f99 )