App-get_flash_videos

 view release on metacpan or  search on metacpan

lib/FlashVideo/Mechanize.pm  view on Meta::CPAN


    print STDERR "<- $text\n";
  }

  return $r;
}

sub update_html {
  my($self, $html) = @_;

  my $charset = _parse_charset($self->response->header("Content-type"));

  # If we have no character set in the header (therefore it is worth looking
  # for a http-equiv in the body) or the content hasn't been decoded (older
  # versions of Mech).
  if($LWP::UserAgent::VERSION < 5.827
    && (!$charset || !Encode::is_utf8($html))) {

    # HTTP::Message helpfully decodes to iso-8859-1 by default. Therefore we
    # do the inverse. This is fucking frail and will probably break.
    $html = Encode::encode("iso-8859-1", $html) if Encode::is_utf8($html);

    # Check this doesn't look like a video..
    if(!FlashVideo::Downloader->check_magic($html)) {
      my $p = HTML::TokeParser->new(\$html);
      while(my $token = $p->get_tag("meta")) {
        my($tag, $attr) = @$token;
        if($tag eq 'meta' && $attr->{"http-equiv"} =~ /Content-type/i) {
          $charset ||= _parse_charset($attr->{content});
        }
      }

      if($charset) {
        eval { $html = Encode::decode($charset, $html) };
        FlashVideo::Utils::error("Failed decoding as $charset: $@") if $@;
      }
    }
  }

  return $self->SUPER::update_html($html);
}

sub _parse_charset {
  my($field) = @_;
  return(($field =~ /;\s*charset=([-_.:a-z0-9]+)/i)[0]);
}

sub get_socks_proxy {
  my $self = shift;
  my $proxy = $self->proxy("http");

  if(defined $proxy && $proxy =~ m!^socks://(.*?):(\d+)!) {
    return "$1:$2";
  }

lib/FlashVideo/Site/Fliqz.pm  view on Meta::CPAN

    $id = $1;
  }

  # This can sometimes pull out the incorrect ID, which causes a SOAP fault to
  # be returned, so we do the other check first.
  if (!$id and $browser->content =~ /\Q$embed_url\E.*?([a-f0-9]{32})/) {
    $id = $1;
  }

  $browser->post("http://services.fliqz.com/mediaassetcomponentservice/20071201/service.svc",
    Content_Type => "text/xml; charset=utf-8",
    SOAPAction   => '"urn:fliqz.s.mac.20071201/IMediaAssetComponentService/ad"',
    Referer      => $embed_url,
    Content      => _get_soap_xml($id)
  );

  my $flv_url  = ($browser->content =~ />(http:[^<]+\.flv)</)[0];

  # If we can't get the FLV URL, try posting our SOAP request to a different
  # URL. Don't know how they decide which server to use - would be good if we could
  # find out.
  if (!$flv_url) {
    # Try posting to a different URL
    $browser->post("http://services.fliqz.com/LegacyServices/Services/MediaAsset/Component/R20071201/service.svc",
      Content_Type => "text/xml; charset=utf-8",
      SOAPAction   => '"urn:fliqz.s.mac.20071201/IMediaAssetComponentService/ad"',

      # For the record, it seems that Fliqz don't care about the referer
      # anyway.
      Referer      => $embed_url,

      Content      => _get_soap_xml($id)
    );

    $flv_url = ($browser->content =~ />(http:[^<]+\.flv)</)[0];

lib/FlashVideo/Site/Itv.pm  view on Meta::CPAN

use FlashVideo::Utils;
use HTML::Entities;

sub find_video {
  my ($self, $browser, $page_url) = @_;

  my($id) = $browser->uri =~ /Filter=(\d+)/;
  die "No id (filter) found in URL\n" unless $id;

  $browser->post("http://mercury.itv.com/PlaylistService.svc",
    Content_Type => "text/xml; charset=utf-8",
    Referer      => "http://www.itv.com/mercury/Mercury_VideoPlayer.swf?v=1.5.309/[[DYNAMIC]]/2",
    SOAPAction   => '"http://tempuri.org/PlaylistService/GetPlaylist"',
    Content      => <<EOF);
<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <SOAP-ENV:Body>
    <tem:GetPlaylist xmlns:tem="http://tempuri.org/" xmlns:itv="http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types" xmlns:com="http://schemas.itv.com/2009/05/Common">
      <tem:request>
        <itv:RequestGuid>FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF</itv:RequestGuid>
        <itv:Vodcrid>
          <com:Id>$id</com:Id>

t/utils.t  view on Meta::CPAN

use Test::More;
use FlashVideo::Utils;
use Encode;

my @tests = (
  [ <<EOF, "text/html", "foo bar"
<Title>foo
bar</title>
EOF
  ],
  [ <<EOF, "text/html; charset=iso-8859-1", "café"
<title
>caf\x{e9}</title>
EOF
  ],
  [ <<EOF, "text/html; charset=windows-1251", "Российская Федерация"
<title>\xD0\xEE\xF1\xF1\xE8\xE9\xF1\xEA\xE0\xFF\x20\xD4\xE5\xE4\xE5\xF0\xE0\xF6\xE8\xFF</title>
EOF
  ],
  [ <<EOF, "text/html", "Российская Федерация"
<META http-equiv=content-type content="text/html; CHARSET=windows-1251" />
<title>\xD0\xEE\xF1\xF1\xE8\xE9\xF1\xEA\xE0\xFF\x20\xD4\xE5\xE4\xE5\xF0\xE0\xF6\xE8\xFF</title>
EOF
  ],
  [ <<EOF, "text/html", "NTTドコモのオフィシャルウェブサイトです。"
<title>\x4E\x54\x54\x83\x68\x83\x52\x83\x82\x82\xCC\x83\x49\x83\x74\x83\x42\x83\x56\x83\x83\x83\x8B\x83\x45\x83\x46\x83\x75\x83\x54\x83\x43\x83\x67\x82\xC5\x82\xB7\x81\x42</title>
<meta http-equiv="content-type" content="text/html; charset=shift_jis">
EOF
  ]
);

# These aren't actually in UTF-8, hence the evilness.
Encode::_utf8_off($_->[0]) for @tests;

{ # Mock version of WWW::Mechanize
  package MockMech;
  use base "FlashVideo::Mechanize";



( run in 0.312 second using v1.01-cache-2.11-cpan-4d50c553e7e )