Data-Tranco

 view release on metacpan or  search on metacpan

lib/Data/Tranco.pm  view on Meta::CPAN


    while (my @row = $sth->fetchrow_array) {
        push(@domains, @row);
    }

    return @domains;
}


sub rank {
    my ($package, $domain) = @_;

    state $sth = $package->get_db->prepare(q{
        SELECT `id`
        FROM `domains`
        WHERE (`domain`=?)
    });

    $sth->execute($domain);

    return $sth->fetchrow_array;
}


sub get_db {
    my $package = shift;

    state $db;

    if (!$db) {
        $package->update_db if ($package->needs_update);
    
        $db = DBI->connect($DSN);
    }

    return $db;
}


#
# returns true if the database needs updating, that is:
#
# 0. $STATIC is not defined
# 1. the DB file doesn't exist
# 2. the zip file doesn't exist
# 3. the DB file is older than the zip file
# 4. the zip file is more than TTL seconds old
#
sub needs_update {
    my $package = shift;

    return undef if ($STATIC);

    return 1 unless (-e $DBFILE && -e $ZIPFILE);
    return 1 unless (stat($DBFILE)->mtime > stat($ZIPFILE)->mtime);
    return 1 unless (stat($ZIPFILE)->mtime > time() - $TTL);

    return undef;
}

sub update_db {
    my $package = shift;

    mirror_file(TRANCO_URL, $TTL);

    my $zip = Archive::Zip->new;

    croak('Zip read error') unless ($zip->read($ZIPFILE) == AZ_OK);

    my $db = DBI->connect($DSN, undef, undef, { AutoCommit => 0 });

    $db->do(q{
        CREATE TABLE IF NOT EXISTS `domains` (
            `id`        INTEGER PRIMARY KEY,
            `domain`    TEXT UNIQUE COLLATE NOCASE
        )
    });

    my $sth = $db->prepare(q{INSERT INTO `domains` (`id`, `domain`) VALUES (?, ?)});

    $db->do(q{DELETE FROM `domains`});

    my $fh  = Archive::Zip::MemberRead->new($zip, basename(TRANCO_URL, '.zip'));
    my $csv = Text::CSV_XS->new;
    while (my $row = $csv->getline($fh)) {
        $sth->execute(@{$row});
    }

    $db->commit;
    $db->disconnect;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Data::Tranco - An interface to the Tranco domain list.

=head1 VERSION

version 0.003

=head1 SYNOPSIS

    use Data::Tranco;

    # get a random domain from the list
    ($domain, $rank) = Data::Tranco->random_domain;

    # get a random domain from .org
    ($domain, $rank) = Data::Tranco->random_domain("org");

    # get the highest ranking domain
    ($domain, $rank) = Data::Tranco->top_domain;



( run in 1.407 second using v1.01-cache-2.11-cpan-39bf76dae61 )