Data-Feed
view release on metacpan or search on metacpan
lib/Data/Feed.pm view on Meta::CPAN
# Auto-detect feed type based on first element. This is prone
# to breakage, but then again we don't want to parse the whole
# feed ourselves.
# XXX - Make this extendable!
{
my $tag;
while ($$content_ref =~ /<(\S+)/sg) {
(my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd;
my $first = substr $t, 0, 1;
$tag = $t, last unless $first eq '?' || $first eq '!';
}
if (! $tag) {
# confess "Could not find the first XML element";
return ();
}
$tag =~ s/^.*://;
if ($tag =~ /^(?:rss|rdf)$/i) {
return 'RSS';
} elsif ($tag =~ /^feed$/i) {
return 'Atom';
}
}
return ();
}
sub fetch_stream {
my ( $self, $stream ) = @_;
my $content = '';
my $ref = ref $stream || '';
if ( !$ref ) {
# if given a string, it's a filename
open( my $fh, '<', $stream )
or Carp::confess("Could not open file $stream: $!");
$content = do { local $/; <$fh> };
close $fh;
}
else {
if ( Scalar::Util::blessed $stream && $stream->isa('URI') ) {
# XXX - Shouldn't using LWP suffice here?
my $ua = LWP::UserAgent->new();
$ua->env_proxy;
my ( $res, $req );
$req = HTTP::Request->new( GET => $stream );
$req->header( 'Accept-Encoding', 'gzip' );
$res = $ua->request($req)
or Carp::confess(
"Failed to fetch URI $stream: " . $res->status_line );
if ( $res->code == 410 ) {
Carp::confess("This feed has been permanently removed");
}
$content = $res->decoded_content;
}
elsif ( $ref eq 'SCALAR' ) {
$content = $$stream;
}
elsif ( $ref eq 'GLOB' ) {
$content = do { local $/; <$stream> };
}
else {
Carp::confess("Don't know how to fetch '$ref'");
}
}
return \$content;
}
sub parse_datetime {
my ($self, $ts) = @_;
return undef unless $ts;
return eval { DateTime::Format::ISO8601->parse_datetime($ts) }
|| eval { DateTime::Format::Flexible->parse_datetime($ts) }
|| do {
my $p = DateTime::Format::Natural->new;
my $dt = $p->parse_datetime($ts);
$p->success ? $dt : undef;
};
}
sub parse_w3cdtf_date {
my ($self, $ts) = @_;
return undef unless $ts;
return eval { DateTime::Format::W3CDTF->parse_datetime($ts) }
|| $self->parse_datetime($ts);
}
sub parse_mail_date {
my ($self, $ts) = @_;
return undef unless $ts;
return eval { DateTime::Format::Mail->new(loose => 1)->parse_datetime($ts) }
|| $self->parse_datetime($ts);
};
1;
__END__
=head1 NAME
Data::Feed - Extensible Feed Parsing Tool
=head1 SYNOPSIS
use Data::Feed;
# from a file
$feed = Data::Feed->parse( '/path/to/my/feed.xml' );
# from an URI
$feed = Data::Feed->parse( URI->new( 'http://example.com/atom.xml' ) );
# from a string
( run in 0.785 second using v1.01-cache-2.11-cpan-df04353d9ac )