WWW-Scrape-Mailman-RSS

 view release on metacpan or  search on metacpan

lib/WWW/Scrape/Mailman/RSS.pm  view on Meta::CPAN

package WWW::Scrape::Mailman::RSS;

use warnings;
use strict;
use WWW::Mechanize;
use HTML::TableExtract;
use XML::Twig;
use XML::RSS;
use HTML::TokeParser::Simple;
use Data::Dumper;

=head1 NAME

WWW::Scrape::Mailman::RSS - Parse mailman listserve archives, format as an rss feed

=head1 VERSION

Version 0.12

=cut

our $VERSION = '0.12';

=head1 SYNOPSIS

On some convenient server to host your rss feeds, schedule
the following script as a cron job at some appropriate interval:

    #!/usr/bin/perl
    use strict;
    use warnings;
    use WWW::Scrape::Mailman::RSS;
    my $feed = WWW::Scrape::Mailman::RSS->new(
       'rss_version' => '0.91',
             'debug' => 0, # try values from 1 to 5 for noisier output
       );

    my %args = (
         'info_url' => 'http://ga.greens.org/mailman/listinfo/gpga-news',
         'base_url' => 'http://ga.greens.org/pipermail/gpga-news',
        'list_name' => 'gpga-news',
         'audience' => 'Greens',
      'description' => 'News by, about and for Greens',
           'cycles' => 2,
      'output_file' => '/home/hesco/sites/news.tns.campaignfoundations.com/gpga_news_feed.html',
       'rss_output' => '/home/hesco/sites/news.tns.campaignfoundations.com/gpga_news_feed.rss',
      );

    $feed->render_feed(\%args);

    # create additional feeds for other lists here

    1;

Then on your site, set your feed aggregator to point to:
	http://news.tns.campaignfoundations.com/gpga_news_feed.rss

=head1 METHODS 

=head2 WWW::Scrape::Mailman::RSS->new( \%defaults )

Given a hashref of defaults which includes the key
'rss_version', construct and returns a $feed object, including
embedded objects for WWW::Mechanize, HTML::TableExtract,
XML::Twig and XML::RSS.  If $defaults->{'debug'} is set, you
can see debugging output; with the noise level increasing as
you increment it from 1 to 5.

=cut

sub new {
  my $class = shift;
  my $defaults = shift;
  my $self = {};

  if(!defined($defaults->{'debug'})){
    $defaults->{'debug'} = 0;
  }
  if(!defined($defaults->{'rss_version'})){
    $defaults->{'rss_version'} = '0.91';
  }
  if(!defined($defaults->{'feed_format'})){
    $defaults->{'feed_format'} = 'html';
  }
  if(!defined($defaults->{'audience'})){
    $defaults->{'audience'} = 'readers';
  }
  if(!defined($defaults->{'feed_type'})){
    $defaults->{'feed_type'} = 'updates';
  }
  if(!defined($defaults->{'server'})){
    $defaults->{'server'} = 'default';
  }

  foreach my $key (keys %{$defaults}){
    $self->{$key} = $defaults->{$key};
  }

  $self->{'agent'} = WWW::Mechanize->new();
  $self->{'te'} = HTML::TableExtract->new( headers => [ 'Archive', 'View by:', 'Downloadable version'] );
  $self->{'twig'} = XML::Twig->new( );
  $self->{'rss'} = XML::RSS->new( version => $defaults->{'rss_version'} );

  bless $self, $class;
  return $self;
}

=head2 $self->render_feed ( \%args )

Given a $feed object and a hashref of arguments, including
list_name, info_url, description, base_url, cycles and
rss_output, download, process and render as an rss feed the
most recent $args->{'cycles'} cycles of a mailman list's
public archives.

=cut



( run in 1.445 second using v1.01-cache-2.11-cpan-df04353d9ac )