WWW-BBSWatch
view release on metacpan or search on metacpan
BBSWatch.pm view on Meta::CPAN
C<-VERBOSE>: Controls the amount of informative output. Useful values are 0, 1,
2. Default is 0 (completely silent).
=cut
sub new {
my $class = shift;
my %args = @_;
# Normalize args
foreach (keys %args) {
my $new = uc($_);
$new = "-$new" unless $new =~ /^-/;
unless ($new eq $_) {
$args{$new} = $args{$_};
delete $args{$_};
}
}
if ($args{-MDA}) {
if (ref $args{-MDA}) {
MIME::Lite::send(@{$args{-MDA}});
} else {
MIME::Lite::send("sendmail", $args{-MDA});
}
}
my $self = {
addr => $args{-MAIL},
warn_timeout => $args{-WARN_TIMEOUT} || (3600 * 3),
db => $args{-DB} || 'BBSWatch',
bbs_url => $args{-BBS_URL},
max_articles => $args{-MAX_ARTICLES} || 999999999,
verbose => $args{-VERBOSE} || 0,
};
die "Must supply -BBS_URL" unless $self->{bbs_url};
return bless $self, $class;
}
=pod
=item $b->retrieve([$catchup])
This method emails new bulletin board messages. If the optional parameter
I<catchup> is true, messages will be marked as read without being
emailed. Nothing useful will happen unless the C<article_list> method is
defined to return the list of articles from the BBS's index page.
B<WWW::BBSWatch> uses the B<LWP::UserAgent> module to retrieve the index and
articles. It honors firewall proxies by calling the
C<LWP::UserAgent::env_proxy> method. So if you are behind a firewall, define
the environment variable I<http_proxy> and your firewall will be handled
correctly.
=back
=cut
# In hindsight this is embarrassingly monolithic.
sub retrieve {
my $self = shift;
my $catchup = shift || 0;
my %msgs = ();
my $lock_file = $self->{db}."_lock";
open(LOCK, ">".$lock_file) or die "Can't open lock file, '$lock_file': $!";
flock(LOCK, LOCK_EX|LOCK_NB) or exit;
tie %msgs, 'SDBM_File', $self->{db}, O_CREAT|O_RDWR, 0644;
my $ua = LWP::UserAgent->new;
$ua->env_proxy();
my $res = $ua->request(HTTP::Request->new('GET', $self->{bbs_url}));
if ($res->is_error) {
my $now = time;
if (defined($msgs{ERROR_TIME})) {
if ($now - $msgs{ERROR_TIME} > $self->{warn_timeout}) {
$self->_mail_error("Unable to retrieve the page\n",
$self->{bbs_url},
"\nfor over ${\($self->{warn_timeout}/3600.0)} hours. Will keep trying.\n",
" ---- Server Error Response ----\n",
$res->error_as_HTML,);
$msgs{ERROR_TIME} = $now;
}
} else {
$msgs{ERROR_TIME} = $now;
}
} else {
my $err = '';
my $content = $res->content;
print STDERR "Retrieved index successfully.\n" if $self->{verbose} > 1;
my @articles = $self->article_list(\$content);
print STDERR "Found ", scalar(@articles), " articles.\n"
if $self->{verbose} > 1;
my $ct = 0;
foreach my $art_url (sort @articles) {
next if defined $msgs{$art_url} and $msgs{$art_url} > 0;
if ($catchup) {
print STDERR "Marking $art_url as read\n" if $self->{verbose};
$msgs{$art_url} = time;
exit if defined $self->{max_articles} and
++$ct >= $self->{max_articles};
next;
}
$res = $ua->request(HTTP::Request->new('GET', $art_url));
if ($res->is_success) {
print STDERR "Sending $art_url\n" if $self->{verbose};
my $content = $res->content;
my ($type, $data) = $self->process_article(\$content);
my %opts = (To => $self->{addr},
Subject => $art_url,
Type => $type,
Data => $$data,
'Message-Id' => "<$art_url>");
{
# There is a very real and legitimate possibility of unitialized
# values in this block. Turn off warnings.
local $ = 0;
my ($from, $name, $subj, $timestamp, $reference) =
( run in 0.871 second using v1.01-cache-2.11-cpan-71847e10f99 )