Robots-Validate
view release on metacpan or search on metacpan
lib/Robots/Validate.pm view on Meta::CPAN
package Robots::Validate;
# ABSTRACT: Validate that IP addresses are associated with known robots
use v5.14;
use Moo 1;
use MooX::Const v0.4.0;
use List::Util 1.33 qw/ first none /;
use Net::DNS::Resolver;
use Ref::Util qw/ is_plain_hashref /;
use Types::Standard -types;
# RECOMMEND PREREQ: Type::Tiny::XS
# RECOMMEND PREREQ: Ref::Util::XS
use namespace::autoclean;
our $VERSION = 'v0.2.9';
has resolver => (
is => 'lazy',
isa => InstanceOf ['Net::DNS::Resolver'],
builder => 1,
);
sub _build_resolver {
return Net::DNS::Resolver->new;
}
has robots => (
is => 'const',
isa => ArrayRef [
Dict [
name => Str,
agent => Optional [RegexpRef],
domain => RegexpRef,
]
],
lazy => 1,
strict => 0,
builder => 1,
);
sub _build_robots {
return [
{
name => 'Amazonbot',
agent => qr/Amazonbot\b/,
domain => qr/\.crawl\.amazonbot\.amazon$/,
},
{
name => 'Applebot',
agent => qr/Applebot\b/,
domain => qr/\.applebot\.apple\.com$/,
},
{
name => 'Arquivo.pt',
agent => qr/arquivo-web-crawler/,
domain => qr/\.arquivo\.pt$/,
},
{
name => 'Baidu',
agent => qr/Baiduspider\b/,
domain => qr/\.crawl\.baidu\.com$/,
},
{
( run in 2.036 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )