Data-Tranco
view release on metacpan or search on metacpan
lib/Data/Tranco.pm view on Meta::CPAN
while (my @row = $sth->fetchrow_array) {
push(@domains, @row);
}
return @domains;
}
sub rank {
my ($package, $domain) = @_;
state $sth = $package->get_db->prepare(q{
SELECT `id`
FROM `domains`
WHERE (`domain`=?)
});
$sth->execute($domain);
return $sth->fetchrow_array;
}
sub get_db {
my $package = shift;
state $db;
if (!$db) {
$package->update_db if ($package->needs_update);
$db = DBI->connect($DSN);
}
return $db;
}
#
# returns true if the database needs updating, that is:
#
# 0. $STATIC is not defined
# 1. the DB file doesn't exist
# 2. the zip file doesn't exist
# 3. the DB file is older than the zip file
# 4. the zip file is more than TTL seconds old
#
sub needs_update {
my $package = shift;
return undef if ($STATIC);
return 1 unless (-e $DBFILE && -e $ZIPFILE);
return 1 unless (stat($DBFILE)->mtime > stat($ZIPFILE)->mtime);
return 1 unless (stat($ZIPFILE)->mtime > time() - $TTL);
return undef;
}
sub update_db {
my $package = shift;
mirror_file(TRANCO_URL, $TTL);
my $zip = Archive::Zip->new;
croak('Zip read error') unless ($zip->read($ZIPFILE) == AZ_OK);
my $db = DBI->connect($DSN, undef, undef, { AutoCommit => 0 });
$db->do(q{
CREATE TABLE IF NOT EXISTS `domains` (
`id` INTEGER PRIMARY KEY,
`domain` TEXT UNIQUE COLLATE NOCASE
)
});
my $sth = $db->prepare(q{INSERT INTO `domains` (`id`, `domain`) VALUES (?, ?)});
$db->do(q{DELETE FROM `domains`});
my $fh = Archive::Zip::MemberRead->new($zip, basename(TRANCO_URL, '.zip'));
my $csv = Text::CSV_XS->new;
while (my $row = $csv->getline($fh)) {
$sth->execute(@{$row});
}
$db->commit;
$db->disconnect;
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Data::Tranco - An interface to the Tranco domain list.
=head1 VERSION
version 0.003
=head1 SYNOPSIS
use Data::Tranco;
# get a random domain from the list
($domain, $rank) = Data::Tranco->random_domain;
# get a random domain from .org
($domain, $rank) = Data::Tranco->random_domain("org");
# get the highest ranking domain
($domain, $rank) = Data::Tranco->top_domain;
( run in 1.407 second using v1.01-cache-2.11-cpan-39bf76dae61 )