StreamFinder
view release on metacpan or search on metacpan
lib/StreamFinder/Rumble.pm view on Meta::CPAN
$response = $ua->get($url2fetch);
if ($response->is_success) {
$html = $response->decoded_content;
} else {
print STDERR $response->status_line if ($DEBUG);
my $no_wget = system('wget','-V');
unless ($no_wget) {
print STDERR "\n..trying wget...\n" if ($DEBUG);
$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
}
}
if ($html && $html =~ m#\"embedUrl\"\:\"([^\"]+)#s) {
my $url2 = $1;
if ($DEBUG > 1 && open DBG, ">/tmp/rumble_page.htm") {
print DBG $html;
close DBG;
}
$self->{'title'} = ($html =~ m#\<title\>([^\<]+)\<\/title\>#s) ? $1 : '';
$self->{'title'} ||= $1 if ($html =~ m#\<meta\s+property\=\"?og\:title\"?\s+content\=\"([^\"]+)\"#s);
$self->{'artist'} = $1 if ($html =~ m#class\=\"media-heading-name(?:\s+truncate)?\"\>([^\<]+)\<#s);
$self->{'artist'} ||= $1 if ($html =~ m#\<button data-title=\"([^\"]+)#s);
$self->{'artist'} =~ s/^\s+//s;
$self->{'artist'} =~ s/\s+$//s;
$self->{'albumartist'} = $baseURL . $1 if ($html =~ m#href\=\"([^\"]+)\" rel=author#s);
$self->{'description'} = $1 if ($html =~ m#\<p\s+class\=\"media\-description\s+media\-description[^\>]*\>(.+?)\<\/p\>#s);
$self->{'description'} ||= $1 if ($html =~ m#\"description\"\:\"([^\"]+)#s);
$self->{'description'} ||= $1 if ($html =~ m#<meta\s+name\=description\"?\s+content\=\"\:\"([^\"]+)#s);
$self->{'description'} ||= $1 if ($html =~ m#<meta\s+property\=\"?og\:description\"?\s+content\=\"\:\"([^\"]+)#s);
$self->{'description'} ||= $1 if ($html =~ m#\<meta\s+name\=\"?twitter\:description\"?\s+content\=\"([^\"]+)\"#s);
$self->{'description'} =~ s/^\s+//s;
$self->{'description'} =~ s/\n\n\n+/\n\n/s;
$self->{'description'} =~ s/\s+$//s;
$self->{'iconurl'} = ($html =~ m#\"thumbnailUrl\"\:\"([^\"]+)#s) ? $1 : '';
$self->{'iconurl'} ||= $1 if ($html =~ m#\<meta\s+property\=\"?og\:image\"?\s+content\=\"?([^\<]+)\<#s);
$self->{'iconurl'} =~ s/\"$//;
$self->{'imageurl'} = $self->{'iconurl'};
if ($html =~ m#i\.user\-image\-\-img\-\-id\-[0-9a-f]+\s+\{([^\}]+)#s) {
my $stuff = $1;
$self->{'articonurl'} = $1 if ($stuff =~ m#url\(([^\)]+)#);
}
if ($html =~ m#Published(.+?)\<span#s) { #JWT:NOTE: CAN'T USE $self->{'created'} HERE!:
my $published = $1;
$self->{'year'} = $1 if ($published =~ /(\d\d\d\d)/);
}
#STEP 2: FETCH THE STREAMS FROM THE "embedUrl":
return $url2;
}
return '';
};
local *getEmbedPage = sub {
my $url2fetch = shift;
my $html = '';
print STDERR "-FETCHING EMBED URL=$url2fetch=\n" if ($DEBUG);
$response = $ua->get($url2fetch);
if ($response->is_success) {
$html = $response->decoded_content;
} else {
print STDERR $response->status_line if ($DEBUG);
my $no_wget = system('wget','-V');
unless ($no_wget) {
print STDERR "\n..trying wget...\n" if ($DEBUG);
$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
}
}
if ($html) {
my @streams = ();
my %ext = ();
my %quality = ();
my %qualities = ();
my %streamHash = (); #USE THIS HASH TO PREVENT ANY DUPLICATE STREAM URLS:
my $ext;
$html =~ s#\\\/#\/#gs;
$self->{'title'} ||= ($html =~ m#\<title\>([^\<]+)\<\/title\>#s) ? $1 : '';
my $url2 = ($html =~ m#\<link\s+rel\=\"canonical\"\s+href\=\"([^\"]+)#s) ? $1 : undef;
$html =~ s#^.+\"u\"\:\{##s;
#PARSE OUT ALL STREAMS (CLASS IS EITHER "mp4", "webm", "###" (RESOLUTION) OR OTHER.
#SINCE THE STREAMS OF EACH RESOLUTION ARE OFTEN REPEATED UNDER "mp4", "webm", or "<other>"
#BASED ON THEIR EXTENSION!:
while ($html =~ s#^.+?\"(\w+)\"\:\{\"url\"\:\"([^\"]+)\"##so) {
my ($quality, $stream) = ($1, $2);
my $bitrate = ($html =~ m#\"bitrate\"\:(\d+)#o) ? $1 : 0;
print STDERR "...quality=$quality= bitrate=$bitrate=(max=".$self->{'bitrate'}.") stream=$stream=\n" if ($DEBUG);
next if ($bitrate > $self->{'bitrate'});
next if ($stream =~ /\.[A-Z]aa(?:\.rec)?\.(?:mp4|webm)$/o); #THESE WON'T PLAY! (VIDEO-ONLY?)
for (my $i=0;$i<=$#okStreams;$i++) {
if ($stream =~ /\.$okStreams[$i]\b/) {
$ext = $okStreams[$i];
last;
}
}
$quality = 1 if (defined($ext) && $ext =~ /aac/o); #MAKE AUDIO-ONLY STREAMS LOWEST QUALITY TO SORT LAST.
if ($quality =~ /\D/o) {
if ($quality =~ /audio/o) {
$quality = 1;
} elsif ($quality =~ /(?:hls|auto)/o) {
$quality = ($self->{'order'} =~ /ext/io) ? ($self->{'quality'}-1) : 10;
} else {
next;
}
}
next if ($quality > $self->{'quality'}); #EXCLUDE ANY HIGHER-RES THAN SELECTED QUALITY.
$quality{$stream} = $quality;
$ext{$stream} = $ext;
$qualities{$quality} = 1;
push @streams, $stream;
}
print STDERR "--Max res(quality)=".$self->{'quality'}."= bitrate=".$self->{'bitrate'}."= order=".join(',',@okStreams)."=\n" if ($DEBUG);
if ($self->{'order'} =~ /ext/i) {
print STDERR "--order streams by kept extensions:\n" if ($DEBUG);
foreach my $ext (@okStreams) {
print STDERR "\n--keep extension=$ext:\n" if ($DEBUG);
foreach my $quality (sort { $b <=> $a } keys %qualities) {
foreach my $stream (@streams) {
print STDERR "------found($quality) stream=$stream=\n" if ($DEBUG);
next unless ($quality{$stream} == $quality &&
($ext =~ /any/io || $ext{$stream} =~ /$ext/));
unless (defined $streamHash{$stream}
|| ($self->{'secure'} && $stream !~ /^https/o)) {
push @{$self->{'streams'}}, $stream;
$streamHash{$stream} = $stream;
}
}
}
}
} else {
print STDERR "--order streams by qualities:\n" if ($DEBUG);
foreach my $quality (sort { $b <=> $a } keys %qualities) {
print STDERR "\n--keep quality=$quality:\n" if ($DEBUG);
foreach my $ext (@okStreams) {
foreach my $stream (@streams) {
print STDERR "------found($ext) ext=".$ext{$stream}."= stream=$stream=\n" if ($DEBUG);
next unless ($quality{$stream} == $quality &&
($ext =~ /any/io || $ext{$stream} =~ /$ext/));
unless (defined $streamHash{$stream}
|| ($self->{'secure'} && $stream !~ /^https/o)) {
push @{$self->{'streams'}}, $stream;
$streamHash{$stream} = $stream;
}
}
}
}
}
if ($html =~ m#\"author\"\:\{\"name\"\:\"([^\"]+)\"\,\"url\"\:\"([^\"]+)#s) {
$self->{'artist'} ||= $1;
$self->{'albumartist'} ||= $2;
}
if ($html =~ m#\"pubDate\"\:\"([^\"]+)#s) {
$self->{'created'} = $1;
$self->{'year'} ||= $1 if ($self->{'created'} =~ /(\d\d\d\d)/);
}
if ($html =~ m#\"i\"\:\"([^\"]+)#s) { #GRAB EMBEDDED IMAGE URL IN CASE MAIN PAGE IS "PRIVATE"(UNFETCHABLE):
$self->{'iconurl'} ||= $1;
$self->{'imageurl'} ||= $self->{'iconurl'};
}
return $url2;
} else {
$url2fetch =~ s#\/embed\/#\/#;
print STDERR "---EMBED PAGE: NO HTML, TRY CHANNEL PAGE ($url2fetch)!...\n" if ($DEBUG);
return &getChannelPage($url2fetch);
}
return '';
};
$url = "$baseURL/embed/${url}/" if ($url !~ m#http# && $url !~ m#\-#);
my $tried = 0;
TRYIT:
print STDERR "-${tried}(Rumble): URL=$url=\n" if ($DEBUG);
if ($url =~ m#\/embed\/#i) {
my $url2 = &getEmbedPage($url);
if ($url2) {
if (!$tried && $url2 =~ s/^from-channel\://) {
$url = $url2;
$tried++;
goto TRYIT;
}
&getHtmlPage($url2);
}
} else {
my $url2 = &getHtmlPage($url);
if ($url2) {
if (!$tried && $url2 =~ s/^from-channel\://) {
$url = $url2;
$tried++;
goto TRYIT;
}
&getEmbedPage($url2);
}
}
$self->{'cnt'} = scalar @{$self->{'streams'}};
foreach my $field (qw(description artist title)) {
$self->{$field} = HTML::Entities::decode_entities($self->{$field});
$self->{$field} = uri_unescape($self->{$field});
$self->{$field} =~ s/(?:\%|\\[ux\%]?00|\bu00)([0-9A-Fa-f]{2})/chr(hex($1))/egs;
$self->{$field} =~ s/\\u\d\d\d\d/ /gs;
}
$self->{'title'} =~ s/\s+\-\s+$self->{'artist'}\s*$//; #CONVERT "Title - Artist" => "Title"
$self->{'iconurl'} ||= $self->{'articonurl'} if ($self->{'articonurl'});
$self->{'imageurl'} = $self->{'iconurl'};
$self->{'albumartist'} =~ s#\?.*$## unless ($self->{'notrim'}); #STRIP OFF ANY EXTRA ARGS, IE. "?e2s=blahblah"
$self->{'total'} = $self->{'cnt'};
$self->{'Url'} = ($self->{'cnt'} > 0) ? $self->{'streams'}->[0] : '';
if ($DEBUG) {
foreach my $i (sort keys %{$self}) {
print STDERR "--KEY=$i= VAL=".$self->{$i}."=\n";
}
print STDERR "--SUCCESS: 1st stream=".$self->{'Url'}."= total=".$self->{'total'}."=\n"
if ($self->{'cnt'} > 0);
}
$self->_log($url);
bless $self, $class; #BLESS IT!
return $self;
}
sub getImageData
{
( run in 0.621 second using v1.01-cache-2.11-cpan-71847e10f99 )