StreamFinder

 view release on metacpan or  search on metacpan

lib/StreamFinder/PodcastAddict.pm  view on Meta::CPAN


This license does not grant you the right to use any trademark, service
mark, tradename, or logo of the Copyright Holder.

This license includes the non-exclusive, worldwide, free-of-charge
patent license to make, have made, use, offer to sell, sell, import and
otherwise transfer the Package with respect to any patent claims
licensable by the Copyright Holder that are necessarily infringed by the
Package. If you institute patent litigation (including a cross-claim or
counterclaim) against any party alleging that the Package constitutes
direct or contributory patent infringement, then this Artistic License
to you shall terminate on the date that such litigation is filed.

Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=cut

package StreamFinder::PodcastAddict;

use strict;
use warnings;
use URI::Escape;
use HTML::Entities ();
use LWP::UserAgent ();
use parent 'StreamFinder::_Class';

my $DEBUG = 0;

sub new
{
	my $class = shift;
	my $url = shift;

	return undef  unless ($url);

	my $self = $class->SUPER::new('PodcastAddict', @_);
	$DEBUG = $self->{'debug'}  if (defined $self->{'debug'});
	my $baseURL = 'https://podcastaddict.com';
	$self->{'id'} = '';
	$self->{'_podcast_id'} = '';
	my $url2fetch = $url;
	my $tried = 0;
	my @epiTitles = ();
	my @epiStreams = ();
	my $ua = LWP::UserAgent->new(@{$self->{'_userAgentOps'}});		
	$ua->timeout($self->{'timeout'});
	$ua->cookie_jar({});
	$ua->env_proxy;
	my $html = '';
	my $response;
	my $isEpisode;

#NOTE:  THE ONLY "EPISODE" URLS NOW USED BY PODCASTADDICT ARE THE URI-ESCAPED ONES CONTAINING
#THE STREAM URL EMBEDDED IN THEIR PODCAST PAGES (WITH NO EPISODE-ID#) AND HAVE THE FORMAT (EXAMPLE):
#"https://podcastaddict.com/episode/https%3A%2F%2Fpscrb.fm%2Frss%2Fp%2Fpdst.fm%2Fe%2Farttrk.com%2Fp%2FABMA5%2Faudioboom.com%2Fposts%2F8280770.mp3%3Fmodified%3D1681401989%26sid%3D2399216%26source%3Drss&podcastId=1719501"
#TO PREVENT USING THE FULL STREAM-URL FOR THE UNIQUE EPISODE-ID#, WE JUST USE THE PODCAST-ID#.
#NO KNOWN WAY EXISTS FOR FETCHING EPISODES VIA JUST A PODCAST AND EPISODE-ID, THEREFORE WE CAN'T
#DETERMINE WHAT THE REAL EPISODE-ID IS, EXCEPT ON PODCAST PAGES STILL USING THE CLASSIC, UNESCAPED
#EPISODE-URLS (format:  "https://podcastaddict.com/episode/<episode-ID-number>")!
#(NOTE ALSO THAT THE "&podcastId=#####" PART OF THE URL IS *NOT* THE SEARCHABLE PODCAST-ID EITHER,
#BUT RATHER THE PODCAST ARTIST'S/CHANNEL ID#)!

TRYIT:
	if ($url2fetch =~ m#^([0-9]+)$#) {  #ASSUME PODCAST-ID, AS EPISODES NO LONGER HAVE DESCERNABLE IDs:
		$self->{'id'} = $1;
		$url2fetch = "$baseURL/podcast/".$self->{'id'};
		$isEpisode = 0;
		print STDERR "-1- PODCAST ID, ID=".$self->{'id'}."= found ($url2fetch)\n"  if ($DEBUG);
	} elsif ($url2fetch =~ m#\/episode\/https?#) { #(LONG) EPISODE URL (ON PODCAST PAGES, ESCAPED):
		$url2fetch = uri_escape($url2fetch)  unless($url2fetch =~ m#\%3A#);  #PODCASTADDICT EPISODE URLS NOW MUST BE URI-ESCAPED!
		#$self->{'id'} IS NOT EMBEDDED OR DETERMINABLE, WILL SET TO PODCAST-ID LATER!
		$isEpisode = 1;
		print STDERR "-2- EPISODE URL, ID=UNKNOWN= found ($url2fetch)\n"  if ($DEBUG);
	} elsif ($url2fetch =~ m#\/episode\/(\d+)\/?$#) {  #CLASSIC (SHORT) EPISODE URL WITH ID. (DEPRECIATED)
		$self->{'id'} = $2;  #CLASSIC EPISODE URLS HAVE A PROPER EPISODE-ID EMBEDDED!
		$isEpisode = 1;
		print STDERR "-3- CLASSIC EPISODE URL, ID=".$self->{'id'}."= found ($url2fetch)\n"  if ($DEBUG);
	} elsif ($url2fetch =~ m#\/podcast\/([^\/]+)#) {  #PODCAST URL
		$self->{'id'} = $1;  #USE UNIQUE NUMBER AS A MADE-UP "EPISODE-ID"
		$self->{'id'} = $1  if ($url2fetch =~ m#\/(\d\d\d\d+)$#);
		$isEpisode = 0;
		print STDERR "-4- PODCAST URL, ID=".$self->{'id'}."= found ($url2fetch)\n"  if ($DEBUG);
	} elsif ($url2fetch =~ m#^([^\/]+)\/(\d+)#) {  #EPISODE-ID:
		my $podcastID = $1;
		my $episodeID = $2;
		$url2fetch = "$baseURL/$podcastID/episode/$episodeID";
		$self->{'id'} = "$podcastID/$episodeID";  #USE UNIQUE NUMBER AS A MADE-UP "EPISODE-ID"
		$isEpisode = 1;
		print STDERR "-5- EPISODE ID=".$self->{'id'}."= found ($url2fetch)\n"  if ($DEBUG);
	} else {
		return undef;  #INVALID ID/URL!
	}

	$html = '';
	print STDERR "-0(PodcastAddict): ($tried) FETCHING URL=$url2fetch= ID=".$self->{'id'}."=\n"  if ($DEBUG);
	$response = $ua->get($url2fetch);
	if ($response->is_success) {
		$html = $response->decoded_content;
	} else {
		print STDERR $response->status_line  if ($DEBUG);
	}
	print STDERR "-1: html=$html=\n"  if ($DEBUG > 1);
	return undef  unless ($html);  #STEP 1 FAILED, INVALID PODCAST URL, PUNT!

	$self->{'genre'} = 'Podcast';
	print STDERR "---ID=".$self->{'id'}."= tried=$tried=\n"  if ($DEBUG);
	unless ($isEpisode) {   #PODCAST PAGE ID (FETCH XML PAGE):
		print STDERR "-----WE'RE A PODCAST PAGE: ID=".$self->{'id'}."!\n"  if ($DEBUG);

		#FETCH PODCAST-WIDE METADATA HERE!:
		$self->{'albumartist'} = $url2fetch;
		$self->{'albumartist'} = $1  if ($html =~ m#\<meta\s+property\=\"(?:og|twitter)\:url\"\s+content\=\"([^\"]+)\"\>#s);
		$self->{'id'} = $1  if ($self->{'albumartist'} =~ m#(\d+)\/?$#);
		if ($html =~ m#\<div\s+class\=\"headerThumbnail\"\>(.+?)\<\/div\>#s) {
			my $thumbnaildata = $1;
			$self->{'articonurl'} = $1  if ($thumbnaildata =~ m#\<img\s+src\=\"([^\"]+)#s);
			$self->{'iconurl'} = $self->{'articonurl'};
			$self->{'imageurl'} = $self->{'articonurl'};
		}
		$self->{'artist'} = $1  if ($html =~ m#name\=\"author\"\s+class\=\"[\w\-]+\"\s+value\=\"([^\"]+)#);
		$self->{'album'} = $1  if ($html =~ m#\<meta\s+itemprop\=\"name\"\s+content\=\"([^\"]+)#s);

		#WE NEED TO EXTRACT 1ST EPISODE ID, BUT WHILST AT IT, GO AHEAD AND FETCH PLAYLIST DATA HERE TOO!:
		my $ep1id = '';
		while ($html =~ s#^.+?\<div\s+class\=\"cellcontent\"\s+itemscope\>##s) {
			if ($html =~ m#\<a\s+class\=\"clickeableItem\"\s+href\=\"([^\"]+)#) {
				my $streamURL = $1;
				(my $stream = uri_unescape($streamURL)) =~ s#^https?\:\/\/podcastaddict\.\w+\/episode\/##o;
				next  if ($self->{'secure'} && $stream !~ /^https/o);

				if ($ep1id) {
					$stream =~ s#\?utm_source=Podcast.*$##o;
					$stream =~ s#[\?\&]from\=PodcastAddict$##o;
					$stream =~ s#\.mp3\?.*$#\.mp3#o;
					if ($html =~ m#\<h3[^\>]*\>(.+?)\<\/h3>#o) {



( run in 0.507 second using v1.01-cache-2.11-cpan-71847e10f99 )