IMDB-JSON
view release on metacpan or search on metacpan
lib/IMDB/JSON.pm view on Meta::CPAN
=head1 PACKAGE
IMDB::JSON
=head1 DESCRIPTION
Search IMDB for a specific title, process the result and extract the JSON script within. Process the JSON script and return a hash reference.
=cut
package IMDB::JSON;
$IMDB::JSON::VERSION = "0.05";
use strict;
use HTML::TokeParser;
use LWP::Simple qw($ua get);
use IO::Socket::SSL;
use JSON::XS;
=head1 SYNOPSIS
use IMDB::JSON;
use Data::Dumper;
my $IMDB = IMDB::JSON->new;
print Dumper($IMDB->search("The Thing", 1982));
exit;
=head1 METHODS
=head2 new(opt => value);
Create a new IMDB::JSON object, options can be passed to the object by specifying them
=head3 OPTIONS
=over
=item base_url
The base URL to start from. This is usually https://www.imdb.com
=item raw_json
If true, returns only raw JSON text, it's not processed into an hash reference
=item user_agent
Set the User-Agent you want to send with the request
=item debug
If true, print debug messages to STDERR
=back
=cut
sub new {
my ($CLASS, %o) = @_;
return bless {
base_url => ($o{base_url} ? $o{base_url} : 'https://www.imdb.com'),
raw_json => ($o{raw_json} ? 1 : 0),
user_agent => $o{user_agent},
debug => $o{debug}
};
}
sub _get {
my ($self, $URL) = @_;
$ENV{'PERL_LWP_SSL_VERIFY_HOSTNAME'} = 0;
my $ua = LWP::UserAgent->new(
ssl_opts => {
verify_hostname => 0,
SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE,
}
);
# Set the user agent to something
$ua->agent($self->{user_agent}) if $self->{user_agent};
print STDERR "DEBUG: fetch URL: $URL\n" if $self->{debug};
my $req = HTTP::Request->new( GET => $URL);
my $response = $ua->request($req);
return $response->content;
}
#URI encoding
sub _enc {
my ($self, $data) = @_;
$data =~ s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg;
return $data;
}
# Process IMDB search results
sub _result {
my ($self, $title, $year) = @_;
my $data = $self->_get($self->{base_url} . '/search/title?title=' . $self->_enc($title) . '&release_date=' . $year . '-01-01,' . $year . '-12-31&view=simple');
print STDERR "DEBUG: " . length($data) . " bytes of data received\n" if $self->{debug};
return if !$data;
my $url;
# Process the results data (must be reference scalar!)
my $p = HTML::TokeParser->new(\$data);
$self->{_cur_id} = '';
# <meta property="imdb:pageConst" content="tt11125620"/>
while(my $t = $p->get_tag('meta')){
if($t->[1]->{content} =~ /(tt\d+)/){
$self->{_cur_id} = $1;
last;
} else {
use Data::Dumper; print Dumper($t);
}
}
$p = HTML::TokeParser->new(\$data);
# Walk down to the results section
while(my $t = $p->get_tag('div')){
last if($t->[1]->{class} eq 'lister-item mode-simple');
}
# Walk through the results and match the correct one
while(my $t = $p->get_tag('span')){
# Found a results chunk
if($t->[1]->{class} eq "lister-item-header"){
( run in 2.695 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )