Robots-Validate

 view release on metacpan or  search on metacpan

lib/Robots/Validate.pm  view on Meta::CPAN

package Robots::Validate;

# ABSTRACT: Validate that IP addresses are associated with known robots

use v5.14;

use Moo 1;

use MooX::Const v0.4.0;
use List::Util 1.33 qw/ first none /;
use Net::DNS::Resolver;
use Ref::Util qw/ is_plain_hashref /;
use Types::Standard -types;

# RECOMMEND PREREQ: Type::Tiny::XS
# RECOMMEND PREREQ: Ref::Util::XS

use namespace::autoclean;

our $VERSION = 'v0.2.9';


has resolver => (
    is      => 'lazy',
    isa     => InstanceOf ['Net::DNS::Resolver'],
    builder => 1,
);

sub _build_resolver {
    return Net::DNS::Resolver->new;
}


has robots => (
    is  => 'const',
    isa => ArrayRef [
        Dict [
            name   => Str,
            agent  => Optional [RegexpRef],
            domain => RegexpRef,
        ]
    ],
    lazy    => 1,
    strict  => 0,
    builder => 1,
);

sub _build_robots {
    return [

        {
            name   => 'Amazonbot',
            agent  => qr/Amazonbot\b/,
            domain => qr/\.crawl\.amazonbot\.amazon$/,
        },

        {
            name   => 'Applebot',
            agent  => qr/Applebot\b/,
            domain => qr/\.applebot\.apple\.com$/,
        },

        {
            name   => 'Arquivo.pt',
            agent  => qr/arquivo-web-crawler/,
            domain => qr/\.arquivo\.pt$/,
        },

        {
            name   => 'Baidu',
            agent  => qr/Baiduspider\b/,
            domain => qr/\.crawl\.baidu\.com$/,

        },

        {



( run in 2.036 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )