App-get_flash_videos
view release on metacpan or search on metacpan
lib/FlashVideo/Site/Youku.pm view on Meta::CPAN
"streamfileids": { "flv": "16*18*16*16*43*4*16*25*16*16*4*27*16*39*41*5*5*59*27*4*59*41*16*25*18*63*4*64*25*5*4*27*41*63*25*41*5*6*24*39*16*19*54*24*4*63*25*27*24*16*41*41*33*24*64*25*5*6*43*16*6*41*27*41*25*18*"},
"videoid": "22722163",
"segs":
{
"flv": [
{
"no": "0",
"size": "4840977",
"seconds": "145"
} ]
},
"fileid": "16*18*16*16*43*4*16*25*16*16*4*27*16*39*41*5*5*59*27*4*59*41*16*25*18*63*4*64*25*5*4*27*41*63*25*41*5*6*24*39*16*19*54*24*4*63*25*27*24*16*41*41*33*24*64*25*5*6*43*16*6*41*27*41*25*18*",
"username": "YBuzz",
"userid": "19746590",
"title": "MV: \u6765\u81ea\u56db\u5ddd\u7684\u6b4c\u58f0",
"key1": "a4156bcd",
"key2": "df891d4af342844b",
"seconds": "145.40",
"streamtypes": [ "flv" ]
} ],
"user": { "id": 0 },
"controller": { "search_count": true }
}
=cut
my ( $shuffle_seed ) = ( $json =~ /"seed":(\d+)/ );
die "Can't find the seed value in the video info JSON"
unless $shuffle_seed;
# Sometimes, the video has a HQ version in mp4 format
my ( $streams ) = ( $json =~ /"streamtypes":\[([^\]]+)\]/ );
my $stream = ( index $streams, 'mp4' ) > 0 ? 'mp4' : 'flv';
debug "Choosing to use the $stream stream (available: $streams)";
# Use the file ID associate with the stream we choose when available
my $fileID = '';
if ($json =~ /"streamfileids":{([^}]+)}/) {
my $streamfileids = $1;
( $fileID ) = ( $streamfileids =~ /"$stream":"([^"]+)"/ );
}
( $fileID ) = ( $json =~ /"fileid":"([^"]+)"/ ) if not $fileID;
die "Can't find the encrypted file ID in the video info JSON"
unless $fileID;
debug "Encrypted file ID: $fileID";
# File ID is given in obfuscated form, each entry is an index in a lookup
# table that is generated from the seed value
my @lookup_table = shuffle_table( $shuffle_seed );
$fileID =~ s/(\d+)\*/$lookup_table[$1]/eg;
debug "Decrypted file ID: $fileID (seed is $shuffle_seed)";
# Session ID seems to be just the Unix time + '1' + 7 random digits,
# the _00 part seems to mean something that I can't figure out
my $sID = sprintf "%s1%07d_00", time, rand( 10000000 ) ;
# Now these are funky
my ( $keyA ) = ( $json =~ /"key1":"([^"]+)"/ );
my ( $keyB ) = ( $json =~ /"key2":"([^"]+)"/ );
my $key = sprintf "%s%x", $keyB, hex( $keyA ) ^ hex( 'a55aa5a5' );
# Video title is in escaped unicode format
my ( $title ) = ( $json =~ /"title":"([^"]+)"/ );
$title =~ s/\\u([a-f0-9]{4})/chr(hex $1)/egi;
# Use the video title as the filename when available
my $filename = get_video_filename( $stream );
$filename = title_to_filename( $title, $stream ) if $title;
my ( $stream_info ) = ( $json =~ /"segs":{"$stream":\[([^\]]+)\]/ );
my @urls;
my $part_count = 0;
while ($stream_info =~ /\G{"no":"?(\d+)"?,([^}]+)},?/g) {
my ( $segment_number, $segment_info ) = ( $1, $2 );
my ( $segment_duration ) = ( $segment_info =~ /"seconds":"([^"]+)"/ );
my ( $segment_size ) = ( $segment_info =~ /"size":"([^"]+)"/ );
# To download segments other than the first (00), we replace
# the digits at position 8 in the file ID with the segment
# number as a two digit upper-case hexidecimal
my $segment_number_str = sprintf '%02X', $segment_number;
my $segment_fileID = $fileID;
substr $segment_fileID, 8, 2, $segment_number_str;
# Combine it all for the request to grab the video link for this segment
$browser->get(
sprintf "http://f.youku.com/player/getFlvPath/sid/%s/st/%s/fileid/%s?K=%s&myp=null",
$sID, $stream, $segment_fileID, $key );
# If we're successful, we should get a 302 with the location of the segment
my $url = $browser->response->header( 'Location' );
die "Youku rejected our attempt to get the video, we're probably out of date"
unless $browser->response->code eq 302 and $url;
# Sometimes, for whatever reason, the location we get back is missing
# the file extension
debug "Video location for segment $segment_number is $url";
$url = "$url.$stream" unless $url =~ /$stream$/;
debug sprintf
"%s, segment %d, %s seconds, %s bytes",
$title, $segment_number, $segment_duration, $segment_size
if ( $title and $segment_duration and $segment_size );
push @urls, [$url, ++$part_count, 0, $segment_size];
}
$_->[2] = $part_count for @urls;
return ( \@urls, $filename );
}
# Modified Fisher-Yates shuffle
sub shuffle_table {
my ( $seed ) = @_;
my @lookup
= split //,
q`abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890`;
my @shuffled;
while ( $#lookup > 0 )
( run in 0.645 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )