StreamFinder

 view release on metacpan or  search on metacpan

lib/StreamFinder/Apple.pm  view on Meta::CPAN

use URI::Escape;
use HTML::Entities ();
use LWP::UserAgent ();
use parent 'StreamFinder::_Class';

my $DEBUG = 0;

sub new
{
	my $class = shift;
	my $url = shift;

	return undef  unless ($url);

	my $self = $class->SUPER::new('Apple', @_);
	$DEBUG = $self->{'debug'}  if (defined $self->{'debug'});

	$self->{'id'} = '';
	(my $url2fetch = $url);
	if ($url2fetch =~ m#^https?\:\/\/(?:embed\.)?podcasts\.apple\.#) {
#EXAMPLE1:my $url = 'https://podcasts.apple.com/us/podcast/wnbc-sec-shorts-josh-snead/id1440412195?i=1000448441439';
#EXAMPLE2:my $url = 'https://podcasts.apple.com/us/podcast/good-bull-hunting-for-texas-a-m-fans/id1440412195';
		$self->{'id'} = ($url =~ m#\/(?:id)?(\d\d\d\d\d+)(?:\?i\=(\d+))?\/?#) ? $1 : '';
		$self->{'id'} .= '/'. $2  if (defined $2);
	} elsif ($url2fetch !~ m#^https?\:\/\/#) {
		my ($id, $podcastid) = split(m#\/#, $url2fetch);
		$self->{'id'} = $id;
		$url2fetch = 'https://podcasts.apple.com/podcast/id' . $id;
		$url2fetch .= '?i=' . $podcastid  if ($podcastid);
	}

	print STDERR "--URL=$url2fetch= ID=".$self->{'id'}."=\n"  if ($DEBUG);
	return undef  unless ($self->{'id'});

	my $html = '';
	print STDERR "-0(Apple): ID=".$self->{'id'}."= AGENT=".join('|',@{$self->{'_userAgentOps'}})."=\n"  if ($DEBUG);
	my $ua = LWP::UserAgent->new(@{$self->{'_userAgentOps'}});
	$ua->timeout($self->{'timeout'});
	$ua->cookie_jar({});
	$ua->env_proxy;
	my $response;
#	$self->{'albumartist'} = $url2fetch;

	if ($self->{'id'} !~ m#\/#) {   #PAGE (multiple episodes):
		print STDERR "i:FETCHING PAGE URL ($url2fetch)...\n"  if ($DEBUG);
		$response = $ua->get($url2fetch);
		if ($response->is_success) {
			$html = $response->decoded_content;
		} else {
			print STDERR $response->status_line  if ($DEBUG);
			my $no_wget = system('wget','-V');
			unless ($no_wget) {
				print STDERR "\n..trying wget...\n"  if ($DEBUG);
				$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
			}
		}
		print STDERR "-1: html=$html=\n"  if ($DEBUG > 1);

		return undef  unless ($html);

		if ($url2fetch =~ s#\/\/embed.podcast#\/\/podcast#) {  #HANDLE "EMBEDDED PODCAST URLS:
			print STDERR "--2a: EMBEDDED PODCAST, take 5, then fetch podcast page ($url2fetch)...\n"  if ($DEBUG);
			sleep 5;  #AVOID HITTING 'EM TOO QUICK IN SUCCESSION (AVOID DOS SUSPICION):
			$response = $ua->get($url2fetch);
			if ($response->is_success) {  #JETCH PODCAST PAGE:
				$html = $response->decoded_content;
			} else {
				print STDERR $response->status_line  if ($DEBUG);
				my $no_wget = system('wget','-V');
				unless ($no_wget) {
					print STDERR "\n..trying wget...\n"  if ($DEBUG);
					$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
				}
			}

			print STDERR "-1: html=$html=\n"  if ($DEBUG > 1);
			return undef  unless ($html);

			if ($html =~ m#${url2fetch}\?i\=(\d+)#s) {
				$self->{'id'} = $1;
				$url2fetch .= '?i=' . $1;
				print STDERR "--3: EMBEDDED EPISODE FOUND (id=$1): URL=$url2fetch)!\n"  if ($DEBUG);
			} else {
				print STDERR "f:Could not find embedded episode in ($url2fetch), aborting!\n";
				return undef;
			}
		} else {
			$url2fetch = ($html =~ m#\,\"uploadDate\"\:\"[^\"]+\"\,\"url\"\:\"([^\"]+)#)
					? $1 : '';
			return undef  unless ($url2fetch);
			$self->{'id'} = ($url2fetch =~ m#\/(?:id)?(\d\d\d\d\d+)(?:\?i\=(\d+))?\/?#) ? $1 : '';
			$self->{'id'} .= '/'. $2  if (defined $2);
			print "--FETCH EPISODE ID=$$self{'id'}= URL=$url2fetch=\n"  if ($DEBUG);
		}
 	}

#FETCH EPISODE:

	print STDERR "i:FETCHING EPISODE URL ($url2fetch)...\n"  if ($DEBUG);
	$response = $ua->get($url2fetch);
	if ($response->is_success) {
		$html = $response->decoded_content;
	} else {
		print STDERR $response->status_line  if ($DEBUG);
		my $no_wget = system('wget','-V');
		unless ($no_wget) {
			print STDERR "\n..trying wget...\n"  if ($DEBUG);
			$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
		}
	}

	print STDERR "-2: html=$html=\n"  if ($DEBUG > 1);
	return undef  unless ($html);

	$self->{'iconurl'} = ($html =~ /\bsrcset\=\"([^\"\s]+)/s) ? $1 : '';
	$self->{'iconurl'} = ($html =~ m#\"thumbnailUrl\"\:\"([^\"]+)#s) ? $1 : ''
			if (!$self->{'iconurl'} || $self->{'iconurl'} !~ /^http/);
	$self->{'imageurl'} = $1  if ($html =~ m#\<meta\s+property\=\"(?:og|twitter)\:image\:secure\_url\"\s+content\=\"([^\"\s]+)#s);
	$self->{'imageurl'} ||= $1  if ($html =~ m#\<meta\s+(?:property|name)\=\"(?:og|twitter)\:image\"\s+content\=\"([^\"\s]+)#s);
	$self->{'imageurl'} = $self->{'iconurl'}
			if (!$self->{'imageurl'}  || $self->{'imageurl'} !~ /^http/);
	$self->{'iconurl'} ||= $self->{'imageurl'};
	if ($html =~ m#\:\{\"\@type\"\:\"CreativeWorkSeries\"([^\}]+)#s) {
		my $artistdata = $1;
		$self->{'artist'} = $1  if ($artistdata =~ m#\"name\"\:\"([^\"]+)\"#s);
		$self->{'albumartist'} = $1  if ($artistdata =~ m#\"url\"\:\"([^\"]+)\"#s);
	}
	if ($html =~ m#\<h1(.+?)\<\/h1\>#si) {
		my $titlestuff = $1;
		if ($titlestuff =~ m#\s+aria\-label\=\"([^\"]+)#s) {
			$self->{'title'} = $1;
		} elsif ($titlestuff =~ m#\>(.+?)\<\/span\>#s) {
			$self->{'title'} = $1;
		}
	}
	$self->{'title'} ||= $1  if ($html =~ s#\"(?:name|itunesTitle)\\?\"\:\\?\"(.+?)\\?\"\,##so);
	$self->{'title'} =~ s#\\##g;
	$self->{'title'} =~ s#\<[^\>]+\>##g;
	$self->{'description'} = $1
			if ($html =~ m#\>\<\!\-\- HTML\_TAG\_START \-\-\>(.+?)\<\!\-\- HTML\_TAG\_END \-\-\>#s);
	#JWT:I DO NOT FULLY TRUST THE REGEX JUST ABOVE!:
	my $shorterDesc = ($html =~ m#\"description\"\:\"([^\\\"]+)#s) ? $1 : '';



( run in 0.426 second using v1.01-cache-2.11-cpan-71847e10f99 )