StreamFinder
view release on metacpan or search on metacpan
lib/StreamFinder/Youtube.pm view on Meta::CPAN
return undef unless ($url);
my $self = $class->SUPER::new('Youtube', @_);
#SET DEFAULTS FOR FLAGS:
$DEBUG = $self->{'debug'} if (defined $self->{'debug'});
$self->{'formatonly'} = 0 unless (defined $self->{'formatonly'});
$self->{'noiframes'} = 0 unless (defined $self->{'noiframes'});
$self->{'notrim'} = 0;
$self->{'youtubeonly'} = 0 unless (defined $self->{'youtubeonly'});
$self->{'youtube-site'} = $DEFAULTYTSITE;
#DEFAULT YOUTUBE-DL ARGUMENTS:
$self->{'youtube-dl-args'} = '--get-url --get-format --get-thumbnail --get-title --get-description --get-id'
unless (defined $self->{'youtube-dl-args'});
#FETCH ANY PARAMETERS PASSED TO THE new() FUNCTION (OVERRIDE ANY SET IN _class (GENERAL OR CONFIG FILES)):
while (@_) {
if ($_[0] =~ /^\-?fast$/o) {
shift;
$self->{'fast'} = (defined($_[0]) && $_[0] =~/^[0-9]$/) ? shift : 1;
} elsif ($_[0] =~ /^\-?noiframes$/o) {
shift;
$self->{'noiframes'} = (defined $_[0]) ? shift : 1;
} elsif ($_[0] =~ /^\-?youtubeonly$/o) {
shift;
$self->{'youtubeonly'} = (defined $_[0]) ? shift : 1;
} elsif ($_[0] =~ /^\-?formatonly$/o) {
shift;
$self->{'formatonly'} = (defined $_[0]) ? shift : 1;
} elsif ($_[0] =~ /^\-?notrim$/o) {
shift;
$self->{'notrim'} = (defined $_[0]) ? shift : 1;
} elsif ($_[0] =~ /^\-?youtube-site$/o) {
shift;
$self->{'youtube-site'} = (defined $_[0]) ? shift : $DEFAULTYTSITE;
$self->{'youtube-site'} = s#\/$##;
$self->{'youtube-site'} = 'https://' . $self->{'youtube-site'}
unless ($self->{'youtube-site'} =~ m#^https?\:\/\/#);
} elsif ($_[0] =~ /^\-?format$/o) {
shift;
$self->{'format'} = shift if (defined $_[0]);
} elsif ($_[0] =~ /^\-?format\-fallback$/o) {
shift;
$self->{'format-fallback'} = shift if (defined $_[0]);
} elsif ($_[0] =~ /^\-?user-agent$/o) {
shift;
$self->{'user-agent'} = shift if (defined $_[0]);
} elsif ($_[0] =~ /^\-?youtube-dl$/o) {
shift;
$self->{'youtube-dl'} = shift if (defined $_[0]);
} elsif ($_[0] =~ /^\-?youtube-dl-args$/o) {
shift;
$self->{'youtube-dl-args'} = shift if (defined $_[0]);
} elsif ($_[0] =~ /^\-?youtube-dl-add-args$/o) {
shift;
$self->{'youtube-dl-add-args'} = shift if (defined $_[0]);
} else {
shift; #DISCARD ANY OTHERS.
}
}
$self->{'youtubeonly'} = 1 if ($self->{'noiframes'}); #NO EMBEDDED RUMBLE-SEARCH IF NO IFRAMES ALLOWED!
$self->{'youtube-dl'} = 'yt-dlp' unless (defined $self->{'youtube-dl'});
print STDERR "-0(Youtube): URL=$url=\n" if ($DEBUG);
$url =~ s/[\?\&]autoplay\=true$//; #STRIP THIS OFF SO WE DON'T HAVE TO.
$url =~ s/[\?\&]list\=.*$//; #yt-dlp SEEMS TO JUST HANG ON "lists" (TRYING TO FETCH A LIST OF VIDEOS?!).
(my $url2fetch = $url);
$self->{'_isaYtPage'} = 1;
#DEPRECIATED (STATION-IDS NOW INCLUDE STUFF BEFORE THE DASH: ($self->{'id'} = $url) =~ s#^.*\-([a-z]\d+)\/?$#$1#;
if ($url2fetch =~ m#^https?\:#) {
$self->{'_isaYtPage'} = 0 unless ($url2fetch =~ /\b(?:youtube\.|youtu.be|ytimg\.)\b/);
#$url2fetch =~ s/www\.youtube\.com/youtube\.be/; #WWW.YOUTUBE.COM SEEMS TO NOW BE BLOCKING youtube-dl?! :/
$self->{'id'} = $1 if ($url2fetch =~ m#\/([^\/]+)\/?$#);
$self->{'id'} =~ s/^watch\?v\=//;
$self->{'id'} =~ s/[\?\&].*$//;
$self->{'id'} = $1 if (!$self->{'_isaYtPage'} && $url2fetch =~ m#id[\=\:\#]?([^\/\s\=\:\#]+)#);
} else {
$self->{'id'} = $url;
$url2fetch = $self->{'youtube-site'} . '/watch?v=' . $url;
}
$url2fetch =~ s/[\?\&](?!v\=).*$// unless ($self->{'notrim'});
print STDERR "-1 (isYT=$$self{'_isaYtPage'}) FETCHING URL=$url2fetch= VIA $$self{'youtube-dl'}: ID=$$self{'id'}=\n" if ($DEBUG);
$self->{'genre'} = 'Video';
$self->{'albumartist'} = $url2fetch;
#FIRST, CHECK IF WE'RE A CHANNEL OR USER PAGE, IF SO, FETCH & RETURN LATEST UPLOADED VIDEO (EXCLUDE MARQUEE VIDEO AT TOP):
if ($self->{'_isaYtPage'} && !$self->{'noiframes'} && ($url2fetch =~ m#\/(?:channel|user|c)\/#
|| $url2fetch =~ m#$self->{'youtube-site'}\/\@#)) { #WE'RE A CHANNEL PAGE, GRAB 1ST VIDEO!:
print STDERR "..1a:We're a channel or user page!...\n" if ($DEBUG);
my $embedded_video;
my $html = '';
my $ua = LWP::UserAgent->new(@{$self->{'_userAgentOps'}});
$ua->timeout($self->{'timeout'});
$ua->max_size(1024); #LIMIT FETCH-SIZE TO AVOID INFINITELY DOWNLOADING A STREAM!
$ua->cookie_jar({});
$ua->env_proxy;
my $response = $ua->get($url2fetch);
$html = $response->decoded_content if ($response->is_success);
if ($html =~ /\<\!DOCTYPE\s+(?:html|text)/i) { #IF WE'RE AN HTML DOC. (NOT A STREAM!), THEN FETCH THE WHOLE THING:
$ua->max_size(undef); #(NOW OK TO FETCH THE WHOLE DOCUMENT)
my $response = $ua->get($url2fetch);
$html = $response->decoded_content if ($response->is_success);
return undef unless ($html);
$html =~ s#^.+\"description\"\:\{\"runs\"\:##s; #USER PAGES CAN HAVE A BANNER VIDEO, *TRY TO* SKIP THIS!
if ($html =~ m#\:\{\"url\"\:\"([^\"]+)\"\,\"webPageType\"\:\"WEB\_PAGE\_TYPE\_WATCH\"\,#s) {
$url2fetch = $1;
$url2fetch =~ s#^\/\/#https\:\/\/#; #URL STARTS WITH "//" (PREMPTIVE)
$url2fetch =~ s#^\/#$self->{'youtube-site'}\/#; #URL IS JUST "/video-id[?other-junk]" (COMMON)
$self->{'id'} = $1 if ($url2fetch =~ m#\/([^\/]+)\/?$#);
$self->{'id'} =~ s/^watch\?v\=//;
$self->{'id'} =~ s/[\?\&].*$//;
print STDERR "---FOUND 1ST EPISODE! FETCHING=$url2fetch= ID=".$self->{'id'}."=\n" if ($DEBUG);
goto DO_YTDL; #SKIP NON-YOUTUBE PAGE CHECK (NEXT PARAGRAPH):
}
}
print STDERR "u:DID NOT FIND A VIDEO ON CHANNEL/USER PAGE, PUNT!" if ($DEBUG);
return undef;
}
#IF NON-YOUTUBE PAGE, LOOK FOR ANYTHING EMBEDDED IN AN IFRAME:
unless ($self->{'_isaYtPage'} || $self->{'noiframes'}) {
print STDERR "..1a:See if we have a StreamFinder-supported URL in 1st iframe?...\n" if ($DEBUG);
my $embedded_video;
my $html = '';
my $ua = LWP::UserAgent->new(@{$self->{'_userAgentOps'}});
$ua->timeout($self->{'timeout'});
$ua->max_size(1024); #LIMIT FETCH-SIZE TO AVOID INFINITELY DOWNLOADING A STREAM!
$ua->cookie_jar({});
$ua->env_proxy;
my $response = $ua->get($url);
$html = $response->decoded_content if ($response->is_success);
if ($html =~ /\<\!DOCTYPE\s+(?:html|text)/i) { #IF WE'RE AN HTML DOC. (NOT A STREAM!), THEN FETCH THE WHOLE THING:
$ua->max_size(undef); #(NOW OK TO FETCH THE WHOLE DOCUMENT)
my $response = $ua->get($url2fetch);
$html = $response->decoded_content if ($response->is_success);
while ($html && $html =~ s#\<iframe([^\>]+)\>##so) {
my $one = $1;
my $embeddedURL = ($one =~ m#\"(https?\:\/\/[^\"]+)#s) ? $1 : '';
if ($embeddedURL) {
$embeddedURL =~ s/[\?\&](?!v\=).*$// unless ($self->{'notrim'});
print STDERR "--embedded IFRAME url=$embeddedURL=\n" if ($DEBUG);
my $haveStreamFinder = 0;
eval { require 'StreamFinder.pm'; $haveStreamFinder = 1; };
if ($haveStreamFinder) {
my %globalArgs = (-noiframes => 1, -debug => $DEBUG);
foreach my $arg (qw(log logfmt)) {
$globalArgs{$arg} = $self->{$arg} if (defined($self->{$arg}) && $self->{$arg});
}
$embedded_video = new StreamFinder($embeddedURL, %globalArgs);
}
last;
}
}
return $embedded_video if (defined($embedded_video) && $embedded_video->count() > 0);
##NEXT, TRY FOR YOUTUBE URLs HIDDEN IN JSON:
while ($html && $html =~ s#\"url\"\:\"([^\"]+)\"\,##so) {
my $one = $1;
my $embeddedURL = ($one =~ m#(https?\:\/\/[^\"]+)#so) ? $1 : '';
next unless ($embeddedURL && $embeddedURL =~ /\b(?:youtube\.|youtu.be|ytimg\.)\b/o);
$embeddedURL =~ s/[\?\&](?!v\=).*$// unless ($self->{'notrim'});
$url2fetch = $embeddedURL;
print STDERR "--embedded YOUTUBE JSON url=$url2fetch=\n" if ($DEBUG);
$self->{'_isaYtPage'} = 1;
$self->{'id'} = $1 if ($url2fetch =~ m#\/([^\/]+)\/?$#);
$self->{'id'} =~ s/^watch\?v\=//;
$self->{'id'} =~ s/[\?\&].*$//;
$self->{'id'} = $1 if (!$self->{'_isaYtPage'} && $url2fetch =~ m#id[\=\:\#]?([^\/\s\=\:\#]+)#);
last;
}
unless ($self->{'youtubeonly'}) {
if ($html =~ /\bRumble\s*\(\"play\"\,\s+\{\"video\"\:\"([a-z0-9\-\_]+)\"/si) {
#EXTRACT CERTAIN EMBEDDED RUMBLE VIDEOS NOT NECESSARILY IN AN IFRAME:
my $embeddedURL = 'https://rumble.com/embed/' . $1;
my $haveRumble = 0;
print STDERR "---FOUND AN EMBEDDED RUMBLE VIDEO ($embeddedURL), SEE IF WE CAN GO WITH THAT!\n" if ($DEBUG);
eval { require 'StreamFinder/Rumble.pm'; $haveRumble = 1; };
if ($haveRumble) {
my %globalArgs = (-debug => $DEBUG);
foreach my $arg (qw(log logfmt)) {
$globalArgs{$arg} = $self->{$arg} if (defined($self->{$arg}) && $self->{$arg});
}
$embedded_video = new StreamFinder::Rumble($embeddedURL, %globalArgs);
return $embedded_video if (defined($embedded_video) && $embedded_video->count() > 0);
}
}
}
}
}
#NEXT: GET STREAMS, THUMBNAIL, ETC. FROM youtube-dl:
DO_YTDL:
if (defined $self->{'formats_by_url'}) {
my %formats_by_url = %{$self->{'formats_by_url'}};
foreach my $i (keys %formats_by_url) {
$self->{'format'} = $formats_by_url{$i} if ($url =~ m#$i#i);
}
}
$self->{'format-fallback'} = $DEFAULTFALLBACK
if (!defined($self->{'format-fallback'}) && $self->{'formatonly'});
my $ytformat = (defined $self->{'format'}) ? $self->{'format'} : $DEFAULTFMT;
my $ua = (defined $self->{'user-agent'}) ? (' --user-agent "'.$self->{'user-agent'}.'"') : '';
my $ytdlArgs = $self->{'youtube-dl-args'};
$ytdlArgs .= $self->{'youtube-dl-add-args'} if (defined $self->{'youtube-dl-add-args'});
$ytdlArgs .= $ua;
$ytdlArgs .= ' -f "' . $ytformat . '" ' unless ($ytformat =~ /^a(?:ny|ll)$/i);
my $try = 0;
my (@ytdldata, @ytStreams);
RETRYIT:
$_ = '';
my $cmd = '';
if (defined($self->{'userid'}) && defined($self->{'userpw'})) { #USER HAS A LOGIN CONFIGURED:
my $uid = $self->{'userid'};
my $upw = $self->{'userpw'};
$cmd = $self->{'youtube-dl'} . '--username "' . $uid . '" --password "' . $upw . '" '
. $ytdlArgs. ' "' . $url2fetch .'"';
} else {
$cmd = $self->{'youtube-dl'} . " $ytdlArgs " . '"' . $url2fetch . '"';
}
print STDERR "--TRY($try of 1): youtube-dl: ARGS=$ytdlArgs= FMT=$ytformat=\nYT COMMAND==>$cmd<==\n" if ($DEBUG);
$_ = `$cmd`;
print STDERR "--YT RETURNED DATA===>$_<===\n" if ($DEBUG);
@ytdldata = split /\r?\n/s;
unless ($try || scalar(@ytdldata) > 0) { #IF NOTHING FOUND, RETRY WITHOUT THE SPECIFIC FILE-FORMAT:
$try++;
if (defined $self->{'format-fallback'} && $self->{'format-fallback'}) {
print STDERR "..1:No ($ytformat) streams found, try again with ($$self{'formatonly'})...\n" if ($DEBUG);
goto RETRYIT if ($ytdlArgs =~ s/\-f\s+\"[^\"]+\"/\-f \"$$self{'format-fallback'}\"/);
}
unless ($self->{'formatonly'}) {
print STDERR "..1:No ($ytformat) streams found, try again for any (audio, etc.)...\n" if ($DEBUG);
goto RETRYIT if ($ytdlArgs =~ s/\-f\s+\"[^\"]+\"//);
}
}
lib/StreamFinder/Youtube.pm view on Meta::CPAN
push @ytStreams, $1 unless ($self->{'secure'} && $_ !~ /^https/o);
$urlcount++;
}
}
}
return undef unless (scalar(@ytdldata) > 0);
#NOTE: ytdldata is ORDERED: TITLE?, ID, STREAM-URLS, THEN THE ICON URL, THEN DESCRIPTION, LASTLY FORMATS!:
unless ($ytdldata[0] =~ m#^https?\:\/\/#) {
$_ = shift(@ytdldata);
$self->{'title'} ||= $_;
}
$self->{'_ytID'} = '';
if ($ytdlArgs =~ /\-\-get\-id\b/ && $ytdldata[0] !~ /^https?\:/) { #GET ID (IF YOUTUBE-ID):
my $get_id = shift(@ytdldata);
$self->{'_ytID'} = $get_id if ($get_id =~ /^[a-z0-9\-\_]{11}$/i);
}
my $fmtline = ($ytdldata[$#ytdldata] =~ m#^https?\:\/\/#) ? '-none' : pop(@ytdldata); #LAST LINE IS (USUALLY) THE LIST OF FORMATS RETURNED.
my @fmtsfound = split(/\+/, $fmtline);
for (my $i=0;$i<=$#fmtsfound;$i++) { #GET STREAM URLS:
$_ = shift @ytdldata;
next unless (/^https?/o);
push @ytStreams, $_ unless ($self->{'secure'} && $_ !~ /^https/o);
}
if ($ytdlArgs =~ /get-thumbnail/o) { #GET THUMBNAIL URL:
$_ = shift @ytdldata;
$self->{'iconurl'} = $_ if (defined($_) && m#^https?\:\/\/#);
}
#GET ANY YOUTUBE-URL (LAST LINE BEFORE FORMAT (OF DESCRIPTION)) IN MANY ODYSEE VIDEOS:
#(THIS CAN BE USED BY StreamFinder::Odysee!)
$self->{'_odysee_yturl'} = pop(@ytdldata)
if ($#ytdldata >= 0 && $ytdldata[$#ytdldata] =~ m#^https?\:\/\/#);
#GET DESCRIPTION (EVERYTHING ELSE):
$self->{'description'} = '';
while (@ytdldata) {
$self->{'description'} .= shift(@ytdldata) . "\n";
}
push @{$self->{'streams'}}, @ytStreams;
$self->{'cnt'} = scalar @{$self->{'streams'}};
print STDERR "-STREAM COUNT=".$self->{'cnt'}."= FMTS=".join('|',@fmtsfound)."= ICON=".$self->{'iconurl'}."=\n" if ($DEBUG);
unless ($try || $self->{'cnt'} > 0) { #IF NO STREAMS FOUND, RETRY WITHOUT THE SPECIFIC FILE-FORMAT:
$try++;
if (defined $self->{'format-fallback'}) {
print STDERR "..1:No ($ytformat) streams found, try again with ($$self{'formatonly'})...\n" if ($DEBUG);
goto RETRYIT if ($ytdlArgs =~ s/\-f\s+\"[^\"]+\"/\-f \"$$self{'format-fallback'}\"/);
}
unless (defined($self->{'formatonly'}) && $self->{'formatonly'}) {
print STDERR "..2:No ($ytformat) streams found, try again for any (audio, etc.)...\n" if ($DEBUG);
goto RETRYIT if ($ytdlArgs =~ s/\-f\s+\"[^\"]+\"//);
}
}
#NOW MANUALLY SCRAPE YOUTUBE PAGE TO TRY TO GET artist, description, year, ETC. DIRECTLY FROM PAGE (IF A YOUTUBE SITE):
unless ($self->{'fast'}) { #(FAST MEANS SKIP SCRAPING YOUTUBE PAGE FOR ADDTL. METADATA)
$try = 0;
RETRYPAGE:
print STDERR "----(try2=$try= FETCHURL=$url2fetch= isYT?=".$self->{'_isaYtPage'}."=\n" if ($DEBUG);
if ($self->{'_isaYtPage'}) { #WE'RE A YOUTUBE PAGE, FETCH METADATA:
#CONVERT "embedded" YT PAGES TO ACTUAL PAGE (EMBEDDED PAGES DON'T HAVE THE METADATA WE'RE SEEKING!:
if ($url2fetch =~ m#^(.+?)\/embed\/([a-z0-9\-\_]{11})#i) { #TRY FETCHING YOUTUBE SITE FROM THE EMBEDDED URL:
$url2fetch = $1.'/watch?v='.$2;
} elsif ($url2fetch =~ m#^\/embed\/([a-z0-9\-\_]{11})#i) { #IF FAIL, TRY www.youtube.com:
$url2fetch = $self->{'youtube-site'} . '/watch?v=' .$1;
}
print STDERR "-2 (TRY=$try) FETCHING SCREEN URL=$url2fetch= ID=".$self->{'id'}."=\n" if ($DEBUG);
my $ua = LWP::UserAgent->new(@{$self->{'_userAgentOps'}});
$ua->timeout($self->{'timeout'});
$ua->cookie_jar({});
$ua->env_proxy;
my $html = '';
my $response = $ua->get($url2fetch);
if ($response->is_success) {
$html = $response->decoded_content;
} else {
print STDERR $response->status_line if ($DEBUG);
my $no_wget = system('wget','-V');
unless ($no_wget) {
print STDERR "\n..trying wget...\n" if ($DEBUG);
$html = `wget -t 2 -T 20 -O- -o /dev/null "$url2fetch" 2>/dev/null `;
}
}
$html =~ s/\\\"/\"\;/gs;
$self->{'genre'} = $1 if ($html =~ m#\"category\"\:\"([^\"]+)#s);
if ($html =~ s#\]\}\,\"title\"\:\{\"runs\"\:\[\{\"text\"\:\"([^\"]+)\"\,\"navigationEndpoint\"\:([^\}]+)##s) {
my $two = $2;
$self->{'artist'} = $1;
$self->{'albumartist'} = $self->{'youtube-site'} . $1 if ($two =~ m#\"url\"\:\"([^\"]+)#);
} elsif ($html =~ s#\<span\s+itemprop\=\"author\"[^\>]*\>(.+?)\<\/span\>##s) {
my $one = $1;
$self->{'artist'} = $1 if ($one =~ m#itemprop\=\"name\"\s+content\=\"([^\"]+)#);
$self->{'albumartist'} = $1 if ($one =~ m#itemprop\=\"url\"\s+href\=\"([^\"]+)#);
$self->{'albumartist'} = '/' . $self->{'albumartist'}
if ($self->{'albumartist'} =~ m#^\@#);
$self->{'albumartist'} = $self->{'youtube-site'} . $self->{'albumartist'}
if ($self->{'albumartist'} =~ m#^\/#);
}
if ($html =~ s#\"videoDetails\"\:\{\"videoId\"\:\"([^\"]+)\"([^\}]+)##s) {
my $two = $2;
$self->{'id'} = $1;
$self->{'title'} = $1 if ($two =~ m#\"title\"\:\"([^\"]+)#);
$self->{'iconurl'} = $1 if ($two =~ m#\"thumbnails\"\:\[\{\"url\"\:\"([^\"]+)#);
$self->{'iconurl'} =~ s/\?.*$//;
}
if ($html =~ m#\"dateText\"\:\{([^\}]+)\}#s) {
my $one = $1;
$self->{'year'} = $1 if ($one =~ /(\d\d\d\d)/);
}
$self->{'articonurl'} = $1 if ($html =~ m#(?:\"CHANNEL\"\,\"image\"|\"videoOwnerRenderer\")\:\{\"thumbnails"\:\[\{\"url\"\:\"([^\"]+)#s);
print "--YT:2 CHANNEL ICON URL1=".$self->{'articonurl'}."=\n" if ($DEBUG);
unless ($self->{'articonurl'}) {
my $ownerstuff = ($html =~ m#\"videoOwnerRenderer\"\:\{([^\}]+)#s) ? $1 : '';
$self->{'articonurl'} = $1 if ($ownerstuff =~ /\"url\"\:\"([^\"]+)/);
}
print "--YT:2 CHANNEL ICON URL2=".$self->{'articonurl'}."=\n" if ($DEBUG);
} elsif (!$try && $self->{'_ytID'}) { #WE'RE NOT A YOUTUBE PAGE, BUT WE HAVE THE YT ID, SO TRY TO FETCH IT FOR METADATA:
print STDERR "--WE ARE NOT A YT PAGE, BUT ytID=".$self->{'_ytID'}."= SO WE WILL TRY AGAIN!\n" if ($DEBUG);
++$try;
++$self->{'_isaYtPage'};
$url2fetch = $self->{'youtube-site'} . '/watch?v=' . $self->{'_ytID'};
goto RETRYPAGE;
( run in 0.702 second using v1.01-cache-2.11-cpan-71847e10f99 )