App-get_flash_videos
view release on metacpan or search on metacpan
lib/FlashVideo/Mechanize.pm view on Meta::CPAN
print STDERR "<- $text\n";
}
return $r;
}
sub update_html {
my($self, $html) = @_;
my $charset = _parse_charset($self->response->header("Content-type"));
# If we have no character set in the header (therefore it is worth looking
# for a http-equiv in the body) or the content hasn't been decoded (older
# versions of Mech).
if($LWP::UserAgent::VERSION < 5.827
&& (!$charset || !Encode::is_utf8($html))) {
# HTTP::Message helpfully decodes to iso-8859-1 by default. Therefore we
# do the inverse. This is fucking frail and will probably break.
$html = Encode::encode("iso-8859-1", $html) if Encode::is_utf8($html);
# Check this doesn't look like a video..
if(!FlashVideo::Downloader->check_magic($html)) {
my $p = HTML::TokeParser->new(\$html);
while(my $token = $p->get_tag("meta")) {
my($tag, $attr) = @$token;
if($tag eq 'meta' && $attr->{"http-equiv"} =~ /Content-type/i) {
$charset ||= _parse_charset($attr->{content});
}
}
if($charset) {
eval { $html = Encode::decode($charset, $html) };
FlashVideo::Utils::error("Failed decoding as $charset: $@") if $@;
}
}
}
return $self->SUPER::update_html($html);
}
sub _parse_charset {
my($field) = @_;
return(($field =~ /;\s*charset=([-_.:a-z0-9]+)/i)[0]);
}
sub get_socks_proxy {
my $self = shift;
my $proxy = $self->proxy("http");
if(defined $proxy && $proxy =~ m!^socks://(.*?):(\d+)!) {
return "$1:$2";
}
lib/FlashVideo/Site/Fliqz.pm view on Meta::CPAN
$id = $1;
}
# This can sometimes pull out the incorrect ID, which causes a SOAP fault to
# be returned, so we do the other check first.
if (!$id and $browser->content =~ /\Q$embed_url\E.*?([a-f0-9]{32})/) {
$id = $1;
}
$browser->post("http://services.fliqz.com/mediaassetcomponentservice/20071201/service.svc",
Content_Type => "text/xml; charset=utf-8",
SOAPAction => '"urn:fliqz.s.mac.20071201/IMediaAssetComponentService/ad"',
Referer => $embed_url,
Content => _get_soap_xml($id)
);
my $flv_url = ($browser->content =~ />(http:[^<]+\.flv)</)[0];
# If we can't get the FLV URL, try posting our SOAP request to a different
# URL. Don't know how they decide which server to use - would be good if we could
# find out.
if (!$flv_url) {
# Try posting to a different URL
$browser->post("http://services.fliqz.com/LegacyServices/Services/MediaAsset/Component/R20071201/service.svc",
Content_Type => "text/xml; charset=utf-8",
SOAPAction => '"urn:fliqz.s.mac.20071201/IMediaAssetComponentService/ad"',
# For the record, it seems that Fliqz don't care about the referer
# anyway.
Referer => $embed_url,
Content => _get_soap_xml($id)
);
$flv_url = ($browser->content =~ />(http:[^<]+\.flv)</)[0];
lib/FlashVideo/Site/Itv.pm view on Meta::CPAN
use FlashVideo::Utils;
use HTML::Entities;
sub find_video {
my ($self, $browser, $page_url) = @_;
my($id) = $browser->uri =~ /Filter=(\d+)/;
die "No id (filter) found in URL\n" unless $id;
$browser->post("http://mercury.itv.com/PlaylistService.svc",
Content_Type => "text/xml; charset=utf-8",
Referer => "http://www.itv.com/mercury/Mercury_VideoPlayer.swf?v=1.5.309/[[DYNAMIC]]/2",
SOAPAction => '"http://tempuri.org/PlaylistService/GetPlaylist"',
Content => <<EOF);
<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<SOAP-ENV:Body>
<tem:GetPlaylist xmlns:tem="http://tempuri.org/" xmlns:itv="http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types" xmlns:com="http://schemas.itv.com/2009/05/Common">
<tem:request>
<itv:RequestGuid>FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF</itv:RequestGuid>
<itv:Vodcrid>
<com:Id>$id</com:Id>
use Test::More;
use FlashVideo::Utils;
use Encode;
my @tests = (
[ <<EOF, "text/html", "foo bar"
<Title>foo
bar</title>
EOF
],
[ <<EOF, "text/html; charset=iso-8859-1", "café"
<title
>caf\x{e9}</title>
EOF
],
[ <<EOF, "text/html; charset=windows-1251", "РоÑÑийÑÐºÐ°Ñ Ð¤ÐµÐ´ÐµÑаÑиÑ"
<title>\xD0\xEE\xF1\xF1\xE8\xE9\xF1\xEA\xE0\xFF\x20\xD4\xE5\xE4\xE5\xF0\xE0\xF6\xE8\xFF</title>
EOF
],
[ <<EOF, "text/html", "РоÑÑийÑÐºÐ°Ñ Ð¤ÐµÐ´ÐµÑаÑиÑ"
<META http-equiv=content-type content="text/html; CHARSET=windows-1251" />
<title>\xD0\xEE\xF1\xF1\xE8\xE9\xF1\xEA\xE0\xFF\x20\xD4\xE5\xE4\xE5\xF0\xE0\xF6\xE8\xFF</title>
EOF
],
[ <<EOF, "text/html", "NTTãã³ã¢ã®ãªãã£ã·ã£ã«ã¦ã§ããµã¤ãã§ãã"
<title>\x4E\x54\x54\x83\x68\x83\x52\x83\x82\x82\xCC\x83\x49\x83\x74\x83\x42\x83\x56\x83\x83\x83\x8B\x83\x45\x83\x46\x83\x75\x83\x54\x83\x43\x83\x67\x82\xC5\x82\xB7\x81\x42</title>
<meta http-equiv="content-type" content="text/html; charset=shift_jis">
EOF
]
);
# These aren't actually in UTF-8, hence the evilness.
Encode::_utf8_off($_->[0]) for @tests;
{ # Mock version of WWW::Mechanize
package MockMech;
use base "FlashVideo::Mechanize";
( run in 0.312 second using v1.01-cache-2.11-cpan-4d50c553e7e )