StreamFinder

 view release on metacpan or  search on metacpan

lib/StreamFinder/Rumble.pm  view on Meta::CPAN

		$response = $ua->get($url2fetch);
		if ($response->is_success) {
			$html = $response->decoded_content;
		} else {
			print STDERR $response->status_line  if ($DEBUG);
			my $no_wget = system('wget','-V');
			unless ($no_wget) {
				print STDERR "\n..trying wget...\n"  if ($DEBUG);
				$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
			}
		}
		if ($html && $html =~ m#\"embedUrl\"\:\"([^\"]+)#s) {
			my $url2 = $1;

			if ($DEBUG > 1 && open DBG, ">/tmp/rumble_page.htm") {
				print DBG $html;
				close DBG;
			}
			$self->{'title'} = ($html =~ m#\<title\>([^\<]+)\<\/title\>#s) ? $1 : '';
			$self->{'title'} ||= $1  if ($html =~ m#\<meta\s+property\=\"?og\:title\"?\s+content\=\"([^\"]+)\"#s);

			$self->{'artist'} = $1  if ($html =~ m#class\=\"media-heading-name(?:\s+truncate)?\"\>([^\<]+)\<#s);
			$self->{'artist'} ||= $1  if ($html =~ m#\<button data-title=\"([^\"]+)#s);
			$self->{'artist'} =~ s/^\s+//s;
			$self->{'artist'} =~ s/\s+$//s;

			$self->{'albumartist'} = $baseURL . $1  if ($html =~ m#href\=\"([^\"]+)\" rel=author#s);

			$self->{'description'} = $1  if ($html =~ m#\<p\s+class\=\"media\-description\s+media\-description[^\>]*\>(.+?)\<\/p\>#s);
			$self->{'description'} ||= $1  if ($html =~ m#\"description\"\:\"([^\"]+)#s);
			$self->{'description'} ||= $1  if ($html =~ m#<meta\s+name\=description\"?\s+content\=\"\:\"([^\"]+)#s);
			$self->{'description'} ||= $1  if ($html =~ m#<meta\s+property\=\"?og\:description\"?\s+content\=\"\:\"([^\"]+)#s);
			$self->{'description'} ||= $1  if ($html =~ m#\<meta\s+name\=\"?twitter\:description\"?\s+content\=\"([^\"]+)\"#s);
			$self->{'description'} =~ s/^\s+//s;
			$self->{'description'} =~ s/\n\n\n+/\n\n/s;
			$self->{'description'} =~ s/\s+$//s;
			$self->{'iconurl'} = ($html =~ m#\"thumbnailUrl\"\:\"([^\"]+)#s) ? $1 : '';
			$self->{'iconurl'} ||= $1  if ($html =~ m#\<meta\s+property\=\"?og\:image\"?\s+content\=\"?([^\<]+)\<#s);
			$self->{'iconurl'} =~ s/\"$//;
			$self->{'imageurl'} = $self->{'iconurl'};
			if ($html =~ m#i\.user\-image\-\-img\-\-id\-[0-9a-f]+\s+\{([^\}]+)#s) {
				my $stuff = $1;
				$self->{'articonurl'} = $1  if ($stuff =~ m#url\(([^\)]+)#);
			}

			if ($html =~ m#Published(.+?)\<span#s) {  #JWT:NOTE: CAN'T USE $self->{'created'} HERE!:
				my $published = $1;
				$self->{'year'} = $1  if ($published =~ /(\d\d\d\d)/);
			}

			#STEP 2:  FETCH THE STREAMS FROM THE "embedUrl":
			return $url2;
		}
		return '';
	};

	local *getEmbedPage = sub {
		my $url2fetch = shift;
		my $html = '';

		print STDERR "-FETCHING EMBED URL=$url2fetch=\n"  if ($DEBUG);
		$response = $ua->get($url2fetch);
		if ($response->is_success) {
			$html = $response->decoded_content;
		} else {
			print STDERR $response->status_line  if ($DEBUG);
			my $no_wget = system('wget','-V');
			unless ($no_wget) {
				print STDERR "\n..trying wget...\n"  if ($DEBUG);
				$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
			}
		}
		if ($html) {
			my @streams = ();
			my %ext = ();
			my %quality = ();
			my %qualities = ();
			my %streamHash = ();  #USE THIS HASH TO PREVENT ANY DUPLICATE STREAM URLS:
			my $ext;

			$html =~ s#\\\/#\/#gs;
			$self->{'title'} ||= ($html =~ m#\<title\>([^\<]+)\<\/title\>#s) ? $1 : '';
			my $url2 = ($html =~ m#\<link\s+rel\=\"canonical\"\s+href\=\"([^\"]+)#s) ? $1 : undef;
			$html =~ s#^.+\"u\"\:\{##s;
			#PARSE OUT ALL STREAMS (CLASS IS EITHER "mp4", "webm", "###" (RESOLUTION) OR OTHER.
			#SINCE THE STREAMS OF EACH RESOLUTION ARE OFTEN REPEATED UNDER "mp4", "webm", or "<other>"
			#BASED ON THEIR EXTENSION!:
			while ($html =~ s#^.+?\"(\w+)\"\:\{\"url\"\:\"([^\"]+)\"##so) {
				my ($quality, $stream) = ($1, $2);
				my $bitrate = ($html =~ m#\"bitrate\"\:(\d+)#o) ? $1 : 0;
				print STDERR "...quality=$quality= bitrate=$bitrate=(max=".$self->{'bitrate'}.") stream=$stream=\n"  if ($DEBUG);
				next  if ($bitrate > $self->{'bitrate'});
				next  if ($stream =~ /\.[A-Z]aa(?:\.rec)?\.(?:mp4|webm)$/o);  #THESE WON'T PLAY! (VIDEO-ONLY?)

				for (my $i=0;$i<=$#okStreams;$i++) {
					if ($stream =~ /\.$okStreams[$i]\b/) {
						$ext = $okStreams[$i];
						last;
					}
				}
				$quality = 1  if (defined($ext) && $ext =~ /aac/o);  #MAKE AUDIO-ONLY STREAMS LOWEST QUALITY TO SORT LAST.
				if ($quality =~ /\D/o) {
					if ($quality =~ /audio/o) {
						$quality = 1;
					} elsif ($quality =~ /(?:hls|auto)/o) {
						$quality = ($self->{'order'} =~ /ext/io) ? ($self->{'quality'}-1) : 10;
					} else {
						next;
					}
				}
				next  if ($quality > $self->{'quality'});  #EXCLUDE ANY HIGHER-RES THAN SELECTED QUALITY.

				$quality{$stream} = $quality;
				$ext{$stream} = $ext;
				$qualities{$quality} = 1;
				push @streams, $stream;
			}
			print STDERR "--Max res(quality)=".$self->{'quality'}."= bitrate=".$self->{'bitrate'}."= order=".join(',',@okStreams)."=\n"  if ($DEBUG);

			if ($self->{'order'} =~ /ext/i) {
				print STDERR "--order streams by kept extensions:\n"  if ($DEBUG);
				foreach my $ext (@okStreams) {
					print STDERR "\n--keep extension=$ext:\n"  if ($DEBUG);
					foreach my $quality (sort { $b <=> $a } keys %qualities) {
						foreach my $stream (@streams) {
							print STDERR "------found($quality) stream=$stream=\n"  if ($DEBUG);
							next  unless ($quality{$stream} == $quality &&
									($ext =~ /any/io || $ext{$stream} =~ /$ext/));
							unless (defined $streamHash{$stream}
									|| ($self->{'secure'} && $stream !~ /^https/o)) {
								push @{$self->{'streams'}}, $stream;
								$streamHash{$stream} = $stream;
							}
						}
					}
				}
			} else {
				print STDERR "--order streams by qualities:\n"  if ($DEBUG);
				foreach my $quality (sort { $b <=> $a } keys %qualities) {
					print STDERR "\n--keep quality=$quality:\n"  if ($DEBUG);
					foreach my $ext (@okStreams) {
						foreach my $stream (@streams) {
							print STDERR "------found($ext) ext=".$ext{$stream}."= stream=$stream=\n"  if ($DEBUG);
							next  unless ($quality{$stream} == $quality && 
									($ext =~ /any/io || $ext{$stream} =~ /$ext/));
							unless (defined $streamHash{$stream}
									|| ($self->{'secure'} && $stream !~ /^https/o)) {
								push @{$self->{'streams'}}, $stream;
								$streamHash{$stream} = $stream;
							}
						}
					}
				}
			}

			if ($html =~ m#\"author\"\:\{\"name\"\:\"([^\"]+)\"\,\"url\"\:\"([^\"]+)#s) {
				$self->{'artist'} ||= $1;
				$self->{'albumartist'} ||= $2;
			}
			if ($html =~ m#\"pubDate\"\:\"([^\"]+)#s) {
				$self->{'created'} = $1;
				$self->{'year'} ||= $1  if ($self->{'created'} =~ /(\d\d\d\d)/);
			}
			if ($html =~ m#\"i\"\:\"([^\"]+)#s) {  #GRAB EMBEDDED IMAGE URL IN CASE MAIN PAGE IS "PRIVATE"(UNFETCHABLE):
				$self->{'iconurl'} ||= $1;
				$self->{'imageurl'} ||= $self->{'iconurl'};
			}
			return $url2;
		} else {
			$url2fetch =~ s#\/embed\/#\/#;
			print STDERR "---EMBED PAGE: NO HTML, TRY CHANNEL PAGE ($url2fetch)!...\n"  if ($DEBUG);
			return &getChannelPage($url2fetch);
		}
		return '';
	};

	$url = "$baseURL/embed/${url}/"  if ($url !~ m#http# && $url !~ m#\-#);
	my $tried = 0;
TRYIT:
	print STDERR "-${tried}(Rumble): URL=$url=\n"  if ($DEBUG);
	if ($url =~ m#\/embed\/#i) {
		my $url2 = &getEmbedPage($url);
		if ($url2) {
			if (!$tried && $url2 =~ s/^from-channel\://) {
				$url = $url2;
				$tried++;
				goto TRYIT;
			}
			&getHtmlPage($url2);
		}
	} else {
		my $url2 = &getHtmlPage($url);
		if ($url2) {
			if (!$tried && $url2 =~ s/^from-channel\://) {
				$url = $url2;
				$tried++;
				goto TRYIT;
			}
			&getEmbedPage($url2);
		}
	}

	$self->{'cnt'} = scalar @{$self->{'streams'}};
	foreach my $field (qw(description artist title)) {
		$self->{$field} = HTML::Entities::decode_entities($self->{$field});
		$self->{$field} = uri_unescape($self->{$field});
		$self->{$field} =~ s/(?:\%|\\[ux\%]?00|\bu00)([0-9A-Fa-f]{2})/chr(hex($1))/egs;
		$self->{$field} =~ s/\\u\d\d\d\d/ /gs;
	}
	$self->{'title'} =~ s/\s+\-\s+$self->{'artist'}\s*$//;  #CONVERT "Title - Artist" => "Title"
	$self->{'iconurl'} ||= $self->{'articonurl'}  if ($self->{'articonurl'});
	$self->{'imageurl'} = $self->{'iconurl'};
	$self->{'albumartist'} =~ s#\?.*$##  unless ($self->{'notrim'});  #STRIP OFF ANY EXTRA ARGS, IE. "?e2s=blahblah"
	$self->{'total'} = $self->{'cnt'};
	$self->{'Url'} = ($self->{'cnt'} > 0) ? $self->{'streams'}->[0] : '';
	if ($DEBUG) {
		foreach my $i (sort keys %{$self}) {
			print STDERR "--KEY=$i= VAL=".$self->{$i}."=\n";
		}
		print STDERR "--SUCCESS: 1st stream=".$self->{'Url'}."= total=".$self->{'total'}."=\n"
				if ($self->{'cnt'} > 0);
	}
	$self->_log($url);

	bless $self, $class;   #BLESS IT!

	return $self;
}

sub getImageData
{



( run in 0.621 second using v1.01-cache-2.11-cpan-71847e10f99 )