Data-Feed

 view release on metacpan or  search on metacpan

lib/Data/Feed.pm  view on Meta::CPAN

    # Auto-detect feed type based on first element. This is prone
    # to breakage, but then again we don't want to parse the whole
    # feed ourselves.

    # XXX - Make this extendable!

    { 
        my $tag;

        while ($$content_ref =~ /<(\S+)/sg) {
            (my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd;
            my $first = substr $t, 0, 1;
            $tag = $t, last unless $first eq '?' || $first eq '!';
        }

        if (! $tag) {
            # confess "Could not find the first XML element";
            return ();
        }

        $tag =~ s/^.*://;

        if ($tag =~ /^(?:rss|rdf)$/i) {
            return 'RSS';
        } elsif ($tag =~ /^feed$/i) {
            return 'Atom';
        }
    }

    return ();
}

sub fetch_stream {
    my ( $self, $stream ) = @_;

    my $content = '';
    my $ref = ref $stream || '';
    if ( !$ref ) {

        # if given a string, it's a filename
        open( my $fh, '<', $stream )
            or Carp::confess("Could not open file $stream: $!");
        $content = do { local $/; <$fh> };
        close $fh;
    }
    else {
        if ( Scalar::Util::blessed $stream && $stream->isa('URI') ) {

            # XXX - Shouldn't using LWP suffice here?
            my $ua = LWP::UserAgent->new();
            $ua->env_proxy;
            my ( $res, $req );
            $req = HTTP::Request->new( GET => $stream );
            $req->header( 'Accept-Encoding', 'gzip' );
            $res = $ua->request($req)
                or Carp::confess(
                "Failed to fetch URI $stream: " . $res->status_line );
            if ( $res->code == 410 ) {
                Carp::confess("This feed has been permanently removed");
            }
            $content = $res->decoded_content;
        }
        elsif ( $ref eq 'SCALAR' ) {
            $content = $$stream;
        }
        elsif ( $ref eq 'GLOB' ) {
            $content = do { local $/; <$stream> };
        }
        else {
            Carp::confess("Don't know how to fetch '$ref'");
        }
    }

    return \$content;
}

sub parse_datetime {
    my ($self, $ts) = @_;
    return undef unless $ts;
    return eval { DateTime::Format::ISO8601->parse_datetime($ts) }
        || eval { DateTime::Format::Flexible->parse_datetime($ts) }
        || do {
        my $p = DateTime::Format::Natural->new;
        my $dt = $p->parse_datetime($ts);
        $p->success ? $dt : undef;
    };
}

sub parse_w3cdtf_date {
    my ($self, $ts) = @_;
    return undef unless $ts;
    return eval { DateTime::Format::W3CDTF->parse_datetime($ts) }
        || $self->parse_datetime($ts);
}

sub parse_mail_date {
    my ($self, $ts) = @_;
    return undef unless $ts;
    return eval { DateTime::Format::Mail->new(loose => 1)->parse_datetime($ts) }
        || $self->parse_datetime($ts);
};

1;

__END__

=head1 NAME

Data::Feed - Extensible Feed Parsing Tool

=head1 SYNOPSIS

  use Data::Feed;

  # from a file
  $feed = Data::Feed->parse( '/path/to/my/feed.xml' );

  # from an URI
  $feed = Data::Feed->parse( URI->new( 'http://example.com/atom.xml' ) );

  # from a string



( run in 0.785 second using v1.01-cache-2.11-cpan-df04353d9ac )