App-scrape

 view release on metacpan or  search on metacpan

bin/scrape2rss.pl  view on Meta::CPAN

) or pod2usage(2);
pod2usage(1) if $help;

die "No URL given.\n"
    unless @ARGV;

$feed_url ||= $outfile || 'feed.atom';
$feed_title ||= 'Atom feed';
$category ||= '';

my $updated = Time::Piece->gmtime->strftime('%Y-%m-%dT%H:%M:%SZ');

my $feed = XML::Atom::SimpleFeed->new(
    title   => $feed_title,
    link    => $feed_url,
    link    => { rel => 'self', href => $feed_url, },
    author  => 'scrape2rss',
    id      => $feed_url,
    updated => $updated,
);

my %seen;
while (@ARGV) {
    my $url = shift @ARGV;
    next if $seen{ $url }++;
    
    my $html;
    if ($url eq '-') {
        # read from STDIN

bin/scrape2rss.pl  view on Meta::CPAN

            summary => $summary,
            permalink => $permalink,
            title => $title,
            date => $date,
            #category => $category,
        }, {
        base => $url,
    });

    for my $item (@rows) {
        my $item_updated = $item->{date} || $updated;
        
        # Now, extract the information, just in case there is "garbage"
        # around the string
        (my $extr = $date_fmt) =~ s!%\w!\\d+!g;
        $extr = qr/($extr)/;
        
        if ($item_updated =~ /$extr/) {
            $item_updated = $1;
        } else {
            warn "Is [$updated] a valid date?\n";
            $item_updated = $updated;
        };
        
        my $ts = Time::Piece->strptime( $item_updated, $date_fmt );
        $updated = $ts->strftime('%Y-%m-%dT%H:%M:%SZ');

        my $enc_url = $item->{permalink};
        
        my %info = (
            title     => $item->{title},
            link      => $enc_url,
            id        => $enc_url,
            summary   => $item->{summary},
            updated   => $item_updated,
            category  => ($item->{category} || $category),
        );
        
        if ($debug) {
            for (sort keys %info) {
                printf "%10s : %s\n", $_, $info{ $_ };
            };
        };
            
        # beware. XML::Atom::SimpleFeed uses warnings => fatal,



( run in 0.365 second using v1.01-cache-2.11-cpan-05444aca049 )