App-scrape
view release on metacpan or search on metacpan
bin/scrape2rss.pl view on Meta::CPAN
) or pod2usage(2);
pod2usage(1) if $help;
die "No URL given.\n"
unless @ARGV;
$feed_url ||= $outfile || 'feed.atom';
$feed_title ||= 'Atom feed';
$category ||= '';
my $updated = Time::Piece->gmtime->strftime('%Y-%m-%dT%H:%M:%SZ');
my $feed = XML::Atom::SimpleFeed->new(
title => $feed_title,
link => $feed_url,
link => { rel => 'self', href => $feed_url, },
author => 'scrape2rss',
id => $feed_url,
updated => $updated,
);
my %seen;
while (@ARGV) {
my $url = shift @ARGV;
next if $seen{ $url }++;
my $html;
if ($url eq '-') {
# read from STDIN
bin/scrape2rss.pl view on Meta::CPAN
summary => $summary,
permalink => $permalink,
title => $title,
date => $date,
#category => $category,
}, {
base => $url,
});
for my $item (@rows) {
my $item_updated = $item->{date} || $updated;
# Now, extract the information, just in case there is "garbage"
# around the string
(my $extr = $date_fmt) =~ s!%\w!\\d+!g;
$extr = qr/($extr)/;
if ($item_updated =~ /$extr/) {
$item_updated = $1;
} else {
warn "Is [$updated] a valid date?\n";
$item_updated = $updated;
};
my $ts = Time::Piece->strptime( $item_updated, $date_fmt );
$updated = $ts->strftime('%Y-%m-%dT%H:%M:%SZ');
my $enc_url = $item->{permalink};
my %info = (
title => $item->{title},
link => $enc_url,
id => $enc_url,
summary => $item->{summary},
updated => $item_updated,
category => ($item->{category} || $category),
);
if ($debug) {
for (sort keys %info) {
printf "%10s : %s\n", $_, $info{ $_ };
};
};
# beware. XML::Atom::SimpleFeed uses warnings => fatal,
( run in 0.312 second using v1.01-cache-2.11-cpan-2b0bae70ee8 )