StreamFinder
view release on metacpan or search on metacpan
lib/StreamFinder/Apple.pm view on Meta::CPAN
use URI::Escape;
use HTML::Entities ();
use LWP::UserAgent ();
use parent 'StreamFinder::_Class';
my $DEBUG = 0;
sub new
{
my $class = shift;
my $url = shift;
return undef unless ($url);
my $self = $class->SUPER::new('Apple', @_);
$DEBUG = $self->{'debug'} if (defined $self->{'debug'});
$self->{'id'} = '';
(my $url2fetch = $url);
if ($url2fetch =~ m#^https?\:\/\/(?:embed\.)?podcasts\.apple\.#) {
#EXAMPLE1:my $url = 'https://podcasts.apple.com/us/podcast/wnbc-sec-shorts-josh-snead/id1440412195?i=1000448441439';
#EXAMPLE2:my $url = 'https://podcasts.apple.com/us/podcast/good-bull-hunting-for-texas-a-m-fans/id1440412195';
$self->{'id'} = ($url =~ m#\/(?:id)?(\d\d\d\d\d+)(?:\?i\=(\d+))?\/?#) ? $1 : '';
$self->{'id'} .= '/'. $2 if (defined $2);
} elsif ($url2fetch !~ m#^https?\:\/\/#) {
my ($id, $podcastid) = split(m#\/#, $url2fetch);
$self->{'id'} = $id;
$url2fetch = 'https://podcasts.apple.com/podcast/id' . $id;
$url2fetch .= '?i=' . $podcastid if ($podcastid);
}
print STDERR "--URL=$url2fetch= ID=".$self->{'id'}."=\n" if ($DEBUG);
return undef unless ($self->{'id'});
my $html = '';
print STDERR "-0(Apple): ID=".$self->{'id'}."= AGENT=".join('|',@{$self->{'_userAgentOps'}})."=\n" if ($DEBUG);
my $ua = LWP::UserAgent->new(@{$self->{'_userAgentOps'}});
$ua->timeout($self->{'timeout'});
$ua->cookie_jar({});
$ua->env_proxy;
my $response;
# $self->{'albumartist'} = $url2fetch;
if ($self->{'id'} !~ m#\/#) { #PAGE (multiple episodes):
print STDERR "i:FETCHING PAGE URL ($url2fetch)...\n" if ($DEBUG);
$response = $ua->get($url2fetch);
if ($response->is_success) {
$html = $response->decoded_content;
} else {
print STDERR $response->status_line if ($DEBUG);
my $no_wget = system('wget','-V');
unless ($no_wget) {
print STDERR "\n..trying wget...\n" if ($DEBUG);
$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
}
}
print STDERR "-1: html=$html=\n" if ($DEBUG > 1);
return undef unless ($html);
if ($url2fetch =~ s#\/\/embed.podcast#\/\/podcast#) { #HANDLE "EMBEDDED PODCAST URLS:
print STDERR "--2a: EMBEDDED PODCAST, take 5, then fetch podcast page ($url2fetch)...\n" if ($DEBUG);
sleep 5; #AVOID HITTING 'EM TOO QUICK IN SUCCESSION (AVOID DOS SUSPICION):
$response = $ua->get($url2fetch);
if ($response->is_success) { #JETCH PODCAST PAGE:
$html = $response->decoded_content;
} else {
print STDERR $response->status_line if ($DEBUG);
my $no_wget = system('wget','-V');
unless ($no_wget) {
print STDERR "\n..trying wget...\n" if ($DEBUG);
$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
}
}
print STDERR "-1: html=$html=\n" if ($DEBUG > 1);
return undef unless ($html);
if ($html =~ m#${url2fetch}\?i\=(\d+)#s) {
$self->{'id'} = $1;
$url2fetch .= '?i=' . $1;
print STDERR "--3: EMBEDDED EPISODE FOUND (id=$1): URL=$url2fetch)!\n" if ($DEBUG);
} else {
print STDERR "f:Could not find embedded episode in ($url2fetch), aborting!\n";
return undef;
}
} else {
$url2fetch = ($html =~ m#\,\"uploadDate\"\:\"[^\"]+\"\,\"url\"\:\"([^\"]+)#)
? $1 : '';
return undef unless ($url2fetch);
$self->{'id'} = ($url2fetch =~ m#\/(?:id)?(\d\d\d\d\d+)(?:\?i\=(\d+))?\/?#) ? $1 : '';
$self->{'id'} .= '/'. $2 if (defined $2);
print "--FETCH EPISODE ID=$$self{'id'}= URL=$url2fetch=\n" if ($DEBUG);
}
}
#FETCH EPISODE:
print STDERR "i:FETCHING EPISODE URL ($url2fetch)...\n" if ($DEBUG);
$response = $ua->get($url2fetch);
if ($response->is_success) {
$html = $response->decoded_content;
} else {
print STDERR $response->status_line if ($DEBUG);
my $no_wget = system('wget','-V');
unless ($no_wget) {
print STDERR "\n..trying wget...\n" if ($DEBUG);
$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
}
}
print STDERR "-2: html=$html=\n" if ($DEBUG > 1);
return undef unless ($html);
$self->{'iconurl'} = ($html =~ /\bsrcset\=\"([^\"\s]+)/s) ? $1 : '';
$self->{'iconurl'} = ($html =~ m#\"thumbnailUrl\"\:\"([^\"]+)#s) ? $1 : ''
if (!$self->{'iconurl'} || $self->{'iconurl'} !~ /^http/);
$self->{'imageurl'} = $1 if ($html =~ m#\<meta\s+property\=\"(?:og|twitter)\:image\:secure\_url\"\s+content\=\"([^\"\s]+)#s);
$self->{'imageurl'} ||= $1 if ($html =~ m#\<meta\s+(?:property|name)\=\"(?:og|twitter)\:image\"\s+content\=\"([^\"\s]+)#s);
$self->{'imageurl'} = $self->{'iconurl'}
if (!$self->{'imageurl'} || $self->{'imageurl'} !~ /^http/);
$self->{'iconurl'} ||= $self->{'imageurl'};
if ($html =~ m#\:\{\"\@type\"\:\"CreativeWorkSeries\"([^\}]+)#s) {
my $artistdata = $1;
$self->{'artist'} = $1 if ($artistdata =~ m#\"name\"\:\"([^\"]+)\"#s);
$self->{'albumartist'} = $1 if ($artistdata =~ m#\"url\"\:\"([^\"]+)\"#s);
}
if ($html =~ m#\<h1(.+?)\<\/h1\>#si) {
my $titlestuff = $1;
if ($titlestuff =~ m#\s+aria\-label\=\"([^\"]+)#s) {
$self->{'title'} = $1;
} elsif ($titlestuff =~ m#\>(.+?)\<\/span\>#s) {
$self->{'title'} = $1;
}
}
$self->{'title'} ||= $1 if ($html =~ s#\"(?:name|itunesTitle)\\?\"\:\\?\"(.+?)\\?\"\,##so);
$self->{'title'} =~ s#\\##g;
$self->{'title'} =~ s#\<[^\>]+\>##g;
$self->{'description'} = $1
if ($html =~ m#\>\<\!\-\- HTML\_TAG\_START \-\-\>(.+?)\<\!\-\- HTML\_TAG\_END \-\-\>#s);
#JWT:I DO NOT FULLY TRUST THE REGEX JUST ABOVE!:
my $shorterDesc = ($html =~ m#\"description\"\:\"([^\\\"]+)#s) ? $1 : '';
( run in 0.426 second using v1.01-cache-2.11-cpan-71847e10f99 )