App-get_flash_videos

 view release on metacpan or  search on metacpan

lib/FlashVideo/Generic.pm  view on Meta::CPAN


  my @flv_urls = map {
    (m{http://.+?(http://.+?@{[EXTENSIONS]})}i) ? $1 : $_
  } ($browser->content =~ m{($video_re)}gi);
  if (@flv_urls) {
    require LWP::Simple;
    require Memoize;
    Memoize::memoize("LWP::Simple::head");
    @flv_urls = sort { (LWP::Simple::head($a))[1] <=> (LWP::Simple::head($b))[1] } @flv_urls;
    $possible_filename = (split /\//, $flv_urls[-1])[-1];

    # Un-escape URLs if necessary
    if ($flv_urls[-1] =~ /^http%3a%2f%2f/) {
      $flv_urls[-1] = uri_unescape($flv_urls[-1])
    }
    
    $actual_url = url_exists($browser->clone, $flv_urls[-1]);
  }

  my $filename_is_reliable;

  if(!$actual_url) {
    RE: for my $regex(
        qr{(?si)<embed.*?flashvars=["']?([^"'>]+)},
        qr{(?si)<embed.*?src=["']?([^"'>]+)},
        qr{(?si)<a[^>]* href=["']?([^"'>]+?@{[EXTENSIONS]})},
        qr{(?si)<object[^>]*>.*?<param [^>]*value=["']?([^"'>]+)},
        qr{(?si)<object[^>]*>(.*?)</object>},
        # Attempt to handle scripts using flashvars / swfobject
        qr{(?si)<script[^>]*>(.*?)</script>}) {

      for my $param($browser->content =~ /$regex/gi) {
        (my $url, $possible_filename, $filename_is_reliable) = find_file_param($browser->clone, $param, $prefs);

        if($url) {
          my $resolved_url = url_exists($browser->clone, $url);
          if($resolved_url) {
            $actual_url = $resolved_url;
            last RE;
          }
        }
      }
    }

    if(!$actual_url) {
      for my $iframe($browser->content =~ /<iframe[^>]+src=["']?([^"'>]+)/gi) {
        $iframe = URI->new_abs($iframe, $browser->uri);
        debug "Found iframe: $iframe";
        my $sub_browser = $browser->clone;
        $sub_browser->get($iframe);
        ($actual_url) = eval { $self->find_video($sub_browser, undef, $prefs) };
      }
    }
  }

  my @filenames;
  
  return $actual_url, $possible_filename if $filename_is_reliable;

  $possible_filename =~ s/\?.*//;
  # The actual filename, provided it looks like it might be reasonable
  # (not just numbers)..
  push @filenames, $possible_filename if $possible_filename
    && $possible_filename !~ /^[0-9_.]+@{[EXTENSIONS]}$/;

  # The title of the page, if it isn't similar to the filename..
  my $ext = substr(($actual_url =~ /(@{[EXTENSIONS]})$/)[0], 1);
  push @filenames, title_to_filename($title, $ext) if
    $title && $title !~ /\Q$possible_filename\E/i;

  # A title with just the timestamp in it..
  push @filenames, get_video_filename() if !@filenames;
  
  return $actual_url, @filenames if $actual_url;

  # As a last ditch attempt, download the SWF file as in some cases, sites
  # use an SWF movie file for each FLV.

  # Get SWF URL(s)
  my %swf_urls;

  if (eval { require URI::Find }) {
    my $finder = URI::Find->new(
      sub { $swf_urls{$_[1]}++ if $_[1] =~ /\.swf$/i }
    );
    $finder->find(\$browser->content);
  }
  else {
    # Extract URLs in a frail way.
    while ($browser->content =~ m{(http://[^ "']+?\.swf)}ig) {
      $swf_urls{$_[1]}++;
    }
  }

  if (%swf_urls) {
    foreach my $swf_url (keys %swf_urls) {
      if (my ($flv_url, $title) = search_for_flv_in_swf($browser, $swf_url)) {
        return $flv_url, title_to_filename($title);
      }
    }
  }

  die "No URLs found";
}

sub search_for_flv_in_swf {
  my ($browser, $swf_url) = @_;

  $browser = $browser->clone();

  $browser->get($swf_url);

  if (!$browser->success) {
    die "Couldn't download SWF URL $swf_url: " .
      $browser->response->status_line();
  }

  # SWF data might be compressed.
  my $swf_data = $browser->content;

  if ('C' eq substr $swf_data, 0, 1) {
    if (eval { require Compress::Zlib }) {
      $swf_data = Compress::Zlib::uncompress(substr $swf_data, 8);
    }
    else {
      die "Compress::Zlib is required to uncompress compressed SWF files.\n";
    }
  }

  if ($swf_data =~ m{(http://.{10,300}?\.flv)}i) {
    my $flv_url = $1;

    my $filename = uri_unescape(File::Basename::basename(URI->new($flv_url)->path()));
    $filename =~ s/\.flv$//i;

    return ($flv_url, $filename);
  }

  return;
}

sub find_file_param {
  my($browser, $param, $prefs) = @_;

  for my $file($param =~ /(?:video|movie|file|path)_?(?:href|src|url)?['"]?\s*[=:,]\s*['"]?([^&'" ]+)/gi,
      $param =~ /(?:config|playlist|options)['"]?\s*[,:=]\s*['"]?(http[^'"&]+)/gi,
      $param =~ /['"=](.*?@{[EXTENSIONS]})/gi,
      $param =~ /([^ ]+@{[EXTENSIONS]})/gi,
      $param =~ /SWFObject\(["']([^"']+)/) {

    debug "Found $file";

    my ($actual_url, $filename, $filename_is_reliable) = guess_file($browser, $file, '', $prefs);

    if(!$actual_url && $file =~ /\?(.*)/) {
      # Maybe we have query params?
      debug "Trying query param on $1";

      for my $query_param(split /[;&]/, $1) {
        my($query_key, $query_value) = split /=/, $query_param;
        debug "Found $query_value from $query_key";

        ($actual_url, $filename, $filename_is_reliable)
          = guess_file($browser, $query_value, '', $prefs);

        last if $actual_url;
      }
    }

    if($actual_url) {
      my $possible_filename = $filename || (split /\//, $actual_url)[-1];

      return $actual_url, $possible_filename, $filename_is_reliable;
    }
  }

  if($param =~ m{(rtmp://[^ &"']+)}) {
    info "This looks like RTMP ($1), no generic support yet..";
  }
  
  return;
}

sub guess_file {
  my($browser, $file, $once, $prefs) = @_;

  # Contains lots of URI encoding, so try escaping..
  $file = uri_unescape($file) if scalar(() = $file =~ /%[A-F0-9]{2}/gi) > 3;

  my $orig_uri = URI->new_abs($file, $browser->uri);

  info "Guessed $orig_uri trying...";

  if($orig_uri) {
    my $uri = url_exists($browser->clone, $orig_uri);

    if($uri) {
      # Check to see if this URL is for a supported site.
      my ($package, $url) = FlashVideo::URLFinder->find_package($uri,
        $browser->clone);

      if($package && $package ne __PACKAGE__) {
        debug "$uri is supported by $package.";
        (my $browser_on_supported_site = $browser->clone())->get($uri);
        return $package->find_video($browser_on_supported_site, $uri, $prefs), 1;
      }

      my $content_type = $browser->response->header("Content-type");

      if($content_type =~ m!^(text|application/xml)!) {
        # Just in case someone serves the video itself as text/plain.
        $browser->add_header("Range", "bytes=0-10000");
        $browser->get($uri);
        $browser->delete_header("Range");

        if(FlashVideo::Downloader->check_magic($browser->content)
            || $uri =~ m!$video_re!) {
          # It's a video..
          debug "Found a video at $uri";
          return $uri;
        }

        # If this looks like HTML we have no hope of guessing right, so
        # give up now.
        return if $browser->content =~ /<html[^>]*>/i;

        if($browser->content =~ m!($video_re)!) {
          # Found a video URL
          return $1;
        } elsif(!defined $once
            && $browser->content =~ m!(http[-:/a-zA-Z0-9%_.?=&]+)!i) {
          # Try once more, one level deeper..
          return guess_file($browser, $1, 1, $prefs);
        } else {
          info "Tried $uri, but no video URL found";
        }
      } elsif($content_type =~ m!application/! && $uri ne $orig_uri) {
        # We were redirected, maybe something in the new URL?
        return((find_file_param($browser, $uri))[0]);
      } else {
        return $uri->as_string;
      }
    } elsif(not defined $once) {
      # Try using the location of the .swf file as the base, if it's different.
      if($browser->content =~ /["']([^ ]+\.swf)/) {
        my $swf_uri = URI->new_abs($1, $browser->uri);
        if($swf_uri) {
          my $new_uri = URI->new_abs($file, $swf_uri);
          debug "Found SWF: $swf_uri -> $new_uri";
          if($new_uri ne $uri) {
            return guess_file($browser, $new_uri, 1, $prefs);
          }
        }
      }
    }
  }

  return;
}

1;



( run in 0.994 second using v1.01-cache-2.11-cpan-39bf76dae61 )