AnyEvent-Google-PageRank
view release on metacpan or search on metacpan
lib/AnyEvent/Google/PageRank.pm view on Meta::CPAN
use constant {
DEFAULT_AGENT => 'Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)',
DEFAULT_HOST => 'toolbarqueries.google.com',
};
=head1 METHODS
=head2 new(%opts)
Creates new AnyEvent::Google::PageRank object. The following options available (all are optional):
KEY DESCRIPTION DEFAULT
------------------------------------------------------------------
agent User-Agent value in the headers Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)
proxy http proxy as address:port undef
timeout timeout for network operations AnyEvent::HTTP default timeout
host host for query toolbarqueries.google.com
ae_http AnyEvent::HTTP request options as hashref undef
=cut
sub new {
my ($class, %opts) = @_;
my $self = {};
$self->{agent} = delete($opts{agent}) || DEFAULT_AGENT;
$self->{timeout} = delete($opts{timeout});
$self->{proxy} = delete($opts{proxy});
$self->{host} = delete($opts{host});
$self->{ae_http} = delete($opts{ae_http});
if (%opts) {
croak 'Unrecognized options specified: ', join(', ', keys %opts);
}
bless $self, $class;
}
=head2 get($url, $cb->($rank, $headers))
Get rank for specified url and call specified callback on finish. Parameters for callback are:
rank and headers. On fail rank will be undef and reason could be found in $headers->{Reason},
code in $headers->{Status}. Special codes provided by this module are:
695 - malformed url
For other codes see L<AnyEvent::HTTP>
=cut
sub get {
my ($self, $url, $cb) = @_;
croak 'Not a code reference in $cb'
if ref($cb) ne 'CODE';
return $cb->(undef, {Status => 695, Reason => 'malformed url'}) if $url !~ m[^https?://]i;
my $ch = '6' . WWW::Google::PageRank::_compute_ch_new('info:' . $url);
my $query = 'http://' . ($self->{host}||DEFAULT_HOST) . '/tbr?client=navclient-auto&ch=' . $ch .
'&ie=UTF-8&oe=UTF-8&features=Rank&q=info:' . uri_escape($url);
my $opts = {};
if (ref($self) eq 'HASH') {
# call from rank_get
$opts = $self;
$opts->{proxy} = [split /:/, $opts->{proxy}] if defined $opts->{proxy} && index($opts->{proxy}, ':') != -1;
$opts->{headers}{'User-Agent'} = exists($opts->{agent}) ? $opts->{agent} : DEFAULT_AGENT;
}
else {
# object call
%$opts = %{$self->{ae_http}} if ref($self->{ae_http}) eq 'HASH';
$opts->{timeout} = $self->{timeout} if defined $self->{timeout};
$opts->{proxy} = [split /:/, $self->{proxy}] if defined $self->{proxy};
$opts->{headers}{'User-Agent'} = $self->{agent} if defined $self->{agent};
}
http_get $query, %$opts, sub {
my ($data, $headers) = @_;
if ($headers->{Status} =~ /^2/ && $data =~ /Rank_\d+:\d+:(\d+)/) {
$cb->($1, $headers);
}
else {
$cb->(undef, $headers);
}
};
}
=head1 FUNCTIONS
=head2 rank_get($url, key => val, ..., $cb->($rank, $headers))
Get rank for specified url and call specified callback on finish. Key/value pairs
are options understanded by AnyEvent::HTTP::http_request() and new() method of this
module (except ae_http option). For $cb description see get() method.
=cut
sub rank_get {
my $cb = pop @_;
my ($url, %opts) = @_;
get(\%opts, $url, $cb);
}
1;
=head1 BUGS
Not a bug: don't forget to set $AnyEvent::HTTP::MAX_PER_HOST to proper value.
See L<AnyEvent::HTTP> for details.
If you find any bug, please report.
=head1 SEE ALSO
L<WWW::Google::PageRank>, L<AnyEvent::HTTP>
=head1 COPYRIGHT
Copyright Oleg G <oleg@cpan.org>.
( run in 2.795 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )