WWW-BBSWatch

 view release on metacpan or  search on metacpan

BBSWatch.pm  view on Meta::CPAN

C<-VERBOSE>: Controls the amount of informative output. Useful values are 0, 1,
2. Default is 0 (completely silent).

=cut

sub new {
  my $class = shift;
  my %args = @_;

  # Normalize args
  foreach (keys %args) {
    my $new = uc($_);
    $new = "-$new" unless $new =~ /^-/;
    unless ($new eq $_) {
      $args{$new} = $args{$_};
      delete $args{$_};
    }
  }

  if ($args{-MDA}) {
    if (ref $args{-MDA}) {
      MIME::Lite::send(@{$args{-MDA}});
    } else {
      MIME::Lite::send("sendmail", $args{-MDA});
    }
  }

  my $self = {
    addr            => $args{-MAIL},
    warn_timeout    => $args{-WARN_TIMEOUT} || (3600 * 3),
    db              => $args{-DB} || 'BBSWatch',
    bbs_url         => $args{-BBS_URL},
    max_articles    => $args{-MAX_ARTICLES} || 999999999,
    verbose         => $args{-VERBOSE} || 0,
  };

  die "Must supply -BBS_URL" unless $self->{bbs_url};

  return bless $self, $class;
}

=pod

=item $b->retrieve([$catchup])

This method emails new bulletin board messages. If the optional parameter
I<catchup> is true, messages will be marked as read without being
emailed. Nothing useful will happen unless the C<article_list> method is
defined to return the list of articles from the BBS's index page.

B<WWW::BBSWatch> uses the B<LWP::UserAgent> module to retrieve the index and
articles. It honors firewall proxies by calling the
C<LWP::UserAgent::env_proxy> method. So if you are behind a firewall, define
the environment variable I<http_proxy> and your firewall will be handled
correctly.

=back

=cut

# In hindsight this is embarrassingly monolithic.
sub retrieve {
  my $self = shift;
  my $catchup = shift || 0;
  my %msgs = ();
  my $lock_file = $self->{db}."_lock";
  open(LOCK, ">".$lock_file) or die "Can't open lock file, '$lock_file': $!";
  flock(LOCK, LOCK_EX|LOCK_NB) or exit;

  tie %msgs, 'SDBM_File', $self->{db}, O_CREAT|O_RDWR, 0644;

  my $ua = LWP::UserAgent->new;
  $ua->env_proxy();

  my $res = $ua->request(HTTP::Request->new('GET', $self->{bbs_url}));
  if ($res->is_error) {
    my $now = time;
    if (defined($msgs{ERROR_TIME})) {
      if ($now - $msgs{ERROR_TIME} > $self->{warn_timeout}) {
        $self->_mail_error("Unable to retrieve the page\n",
           $self->{bbs_url},
           "\nfor over ${\($self->{warn_timeout}/3600.0)} hours. Will keep trying.\n",
           " ---- Server Error Response ----\n",
           $res->error_as_HTML,);
        $msgs{ERROR_TIME} = $now;
      }
    } else {
      $msgs{ERROR_TIME} = $now;
    }
  } else {
    my $err = '';
    my $content = $res->content;
    print STDERR "Retrieved index successfully.\n" if $self->{verbose} > 1;
    my @articles = $self->article_list(\$content);
    print STDERR "Found ", scalar(@articles), " articles.\n"
      if $self->{verbose} > 1;
    my $ct = 0;
    foreach my $art_url (sort @articles) {
      next if defined $msgs{$art_url} and $msgs{$art_url} > 0;
      if ($catchup) {
        print STDERR "Marking $art_url as read\n" if $self->{verbose};
        $msgs{$art_url} = time;
        exit if defined $self->{max_articles} and
          ++$ct >= $self->{max_articles};
        next;
      }
      $res = $ua->request(HTTP::Request->new('GET', $art_url));
      if ($res->is_success) {
        print STDERR "Sending $art_url\n" if $self->{verbose};
        my $content = $res->content;
        my ($type, $data) = $self->process_article(\$content);
        my %opts = (To      => $self->{addr},
                    Subject => $art_url,
                    Type    => $type,
                    Data    => $$data,
                    'Message-Id' => "<$art_url>");
        {
          # There is a very real and legitimate possibility of unitialized
          # values in this block. Turn off warnings.
          local $ = 0;
          my ($from, $name, $subj, $timestamp, $reference) =



( run in 0.871 second using v1.01-cache-2.11-cpan-71847e10f99 )