XML-FeedLite

 view release on metacpan or  search on metacpan

lib/XML/FeedLite.pm  view on Meta::CPAN


    while($attr =~ s{(\S+)\s*=\s*["']([^"']*)["']}{}smx) {
      if($2) {
	$tagdata->{$1} = $2;
      }
    }

    if($content) {
      my $mode = $tagdata->{'mode'} || q();

      if($mode eq 'escaped') {
	$content = decode_entities($content);

      } elsif($mode eq 'base64') {
	$content = decode_base64($content);
      }

      $tagdata->{'content'} = $content;
    }

    if(scalar keys %{$tagdata}) {
      push @{$entry->{$tag}}, $tagdata;
    }
  }

  push @{$results}, $entry;
  return q();
}

sub meta {
  my ($self, $feed) = @_;

  if(!$self->{'_fetched'}) {
    $self->entries($feed);
    $self->{'_fetched'} = 1;
  }

  if($feed) {
    return $self->{feedmeta}->{$feed}||{};
  }

  return $self->{feedmeta}||{};
}

sub title {
  my ($self, $feed) = @_;
  return $self->meta($feed)->{title} || 'Untitled';
}

sub fetch {
  my ($self, $url_ref, $headers) = @_;

  my $ua = WWW::Curl::Simple->new;

  $self->{'statuscodes'} = {};
  if(!$headers) {
    $headers = {};
  }

  if($ENV{HTTP_X_FORWARDED_FOR}) {
    $headers->{'X-Forwarded-For'} ||= $ENV{'HTTP_X_FORWARDED_FOR'};
  }

  for my $url (keys %{$url_ref}) {
    if(ref $url_ref->{$url} ne 'CODE') {
      $DEBUG and print {*STDERR} qq[handler for $url isn't CODE];
      next;
    }
    $DEBUG and print {*STDERR} qq(Building HTTP::Request for $url [timeout=$self->{'timeout'}] via $url_ref->{$url}\n);

    my $http_headers = HTTP::Headers->new(%{$headers});
    $http_headers->user_agent($self->user_agent());

    if($self->proxy_user() && $self->proxy_pass()) {
      $headers->proxy_authorization_basic($self->proxy_user(), $self->proxy_pass());
    }

    $ua->add_request(HTTP::Request->new('GET', $url, $http_headers));
  }

  $DEBUG and print {*STDERR} qq(Requests submitted. Waiting for content\n);
  my $ref = $ua->wait;

  for my $curl_req (values %{$ref}) {
    my $content = $curl_req->body;
    my $uri     = $curl_req->request->uri;
    $self->{statuscodes}->{$uri} = $curl_req->head =~ /HTTP\S+\s+(\d+)/smx;
    $url_ref->{$uri}->($content);
  }

  return;
}

sub statuscodes {
  my ($self, $url)         = @_;
  $self->{statuscodes} ||= {};

  return $url?$self->{statuscodes}->{$url}:$self->{statuscodes};
}

sub max_req {
  my ($self, @args) = @_;
  return $self->_accessor('max_req', @args);
}

1;
__END__

=head1 NAME

XML::FeedLite - Perl extension for fetching Atom and RSS feeds with minimal outlay

=head1 VERSION

$Revision: 1.9 $

=head1 SYNOPSIS

  use XML::FeedLite;

=head1 DESCRIPTION

lib/XML/FeedLite.pm  view on Meta::CPAN

    $xfl->http_proxy("http://user:pass@squid.myco.com:3128/");

=head2 proxy_user - Get/Set proxy username for authenticating forward-proxies

  This is only required if the username wasn't specified when setting http_proxy

    $xfl->proxy_user('myusername');

=head2 proxy_pass - Get/Set proxy password for authenticating forward-proxies

  This is only required if the password wasn't specified when setting http_proxy

    $xfl->proxy_pass('secretpassword');

=head2 user_agent - Get/Set user-agent for request headers

    $xfl->user_agent('Feedtastic/1.0');

=head2 timeout - Get/Set timeout

    $xfl->timeout(30);

=head2 url - Get/Set DSN

  $xfl->url('http://das.ensembl.org/das/ensembl1834/'); # give url (scalar or arrayref) here if not specified in new()

  Or, if you want to add to the existing url list and you're feeling sneaky...

  push @{$xfl->url}, 'http://my.server/das/additionalsource';

=head2 reset - Flush bufers, reset flags etc.

  $xfl->reset();

=head2 entries - Retrieve XML::Simple data structures from feeds

  my $entry_data = $xfl->entries();

=head2 meta - Meta data globally keyed on feed, or for a given feed 

  my $hrMeta     = $xfl->meta();
  my $hrFeedMeta = $xfl->meta('http://mysite.com/feed.xml');

=head2 title - The name/title of a given feed

  my $title = $xfl->title($feed);

=head2 fetch - Performs the HTTP fetch and processing

  $xfl->fetch({
               #########
               # URLs and associated callbacks
               #
               'url1' => sub { ... },
               'url2' => sub { ... },
              },
              {
               #########
               # Optional HTTP headers
               #
               'X-Forwarded-For' => 'a.b.c.d',
              });

=head2 statuscodes - Retrieve HTTP status codes for request URLs

  my $code         = $xfl->statuscodes($url);
  my $code_hashref = $xfl->statuscodes();

=head2 max_req - set number of running concurrent requests

  $xfl->max_req(5);
  print $xfl->max_req();

=head1 DIAGNOSTICS

=head1 CONFIGURATION AND ENVIRONMENT

=head1 DEPENDENCIES

=over

=item strict

=item warnings

=item WWW::Curl::Simple

=item HTTP::Request

=item HTTP::Headers

=item HTML::Entities

=item MIME::Base64

=item English

=item Carp

=item Readonly

=back

=head1 INCOMPATIBILITIES

=head1 BUGS AND LIMITATIONS

=head1 AUTHOR

Roger Pettett, E<lt>rmp@psyphi.netE<gt>

=head1 LICENSE AND COPYRIGHT

Copyright (C) 2010 by Roger Pettett

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.4 or,
at your option, any later version of Perl 5 you may have available.

=cut



( run in 0.753 second using v1.01-cache-2.11-cpan-99c4e6809bf )