WWW-Scrape-Mailman-RSS
view release on metacpan or search on metacpan
lib/WWW/Scrape/Mailman/RSS.pm view on Meta::CPAN
package WWW::Scrape::Mailman::RSS;
use warnings;
use strict;
use WWW::Mechanize;
use HTML::TableExtract;
use XML::Twig;
use XML::RSS;
use HTML::TokeParser::Simple;
use Data::Dumper;
=head1 NAME
WWW::Scrape::Mailman::RSS - Parse mailman listserve archives, format as an rss feed
=head1 VERSION
Version 0.12
=cut
our $VERSION = '0.12';
=head1 SYNOPSIS
On some convenient server to host your rss feeds, schedule
the following script as a cron job at some appropriate interval:
#!/usr/bin/perl
use strict;
use warnings;
use WWW::Scrape::Mailman::RSS;
my $feed = WWW::Scrape::Mailman::RSS->new(
'rss_version' => '0.91',
'debug' => 0, # try values from 1 to 5 for noisier output
);
my %args = (
'info_url' => 'http://ga.greens.org/mailman/listinfo/gpga-news',
'base_url' => 'http://ga.greens.org/pipermail/gpga-news',
'list_name' => 'gpga-news',
'audience' => 'Greens',
'description' => 'News by, about and for Greens',
'cycles' => 2,
'output_file' => '/home/hesco/sites/news.tns.campaignfoundations.com/gpga_news_feed.html',
'rss_output' => '/home/hesco/sites/news.tns.campaignfoundations.com/gpga_news_feed.rss',
);
$feed->render_feed(\%args);
# create additional feeds for other lists here
1;
Then on your site, set your feed aggregator to point to:
http://news.tns.campaignfoundations.com/gpga_news_feed.rss
=head1 METHODS
=head2 WWW::Scrape::Mailman::RSS->new( \%defaults )
Given a hashref of defaults which includes the key
'rss_version', construct and returns a $feed object, including
embedded objects for WWW::Mechanize, HTML::TableExtract,
XML::Twig and XML::RSS. If $defaults->{'debug'} is set, you
can see debugging output; with the noise level increasing as
you increment it from 1 to 5.
=cut
sub new {
my $class = shift;
my $defaults = shift;
my $self = {};
if(!defined($defaults->{'debug'})){
$defaults->{'debug'} = 0;
}
if(!defined($defaults->{'rss_version'})){
$defaults->{'rss_version'} = '0.91';
}
if(!defined($defaults->{'feed_format'})){
$defaults->{'feed_format'} = 'html';
}
if(!defined($defaults->{'audience'})){
$defaults->{'audience'} = 'readers';
}
if(!defined($defaults->{'feed_type'})){
$defaults->{'feed_type'} = 'updates';
}
if(!defined($defaults->{'server'})){
$defaults->{'server'} = 'default';
}
foreach my $key (keys %{$defaults}){
$self->{$key} = $defaults->{$key};
}
$self->{'agent'} = WWW::Mechanize->new();
$self->{'te'} = HTML::TableExtract->new( headers => [ 'Archive', 'View by:', 'Downloadable version'] );
$self->{'twig'} = XML::Twig->new( );
$self->{'rss'} = XML::RSS->new( version => $defaults->{'rss_version'} );
bless $self, $class;
return $self;
}
=head2 $self->render_feed ( \%args )
Given a $feed object and a hashref of arguments, including
list_name, info_url, description, base_url, cycles and
rss_output, download, process and render as an rss feed the
most recent $args->{'cycles'} cycles of a mailman list's
public archives.
=cut
( run in 1.445 second using v1.01-cache-2.11-cpan-df04353d9ac )