Mail-Digest-Tools
view release on metacpan or search on metacpan
: "${$config_out_ref}{'dir_digest'}/archived_today.txt";
my $dir_archive_top = ${$config_out_ref}{'dir_archive_top'};
die "Missing top archive directory: $!" unless (-d $dir_archive_top);
foreach ('a'..'z') {
die "Missing archive subdirectory $_: $!" unless (-d "$dir_archive_top/$_");
}
die "Missing archive subdirectory 'other': $!" unless (-d "$dir_archive_top/other");
open ARCH, ">$archfile" or die "Couldn't open $archfile for writing: $!";
print ARCH 'Archived today (', scalar(localtime), "):\n";
print ARCH '-' x 41, "\n";
my ($thr, $archstr);
my $toarchive = 0;
foreach $thr (sort keys %{$nonrecentref}) {
my $initial = lc(substr $thr, 0, 1);
print "Archiving: $thr\n";
$archstr .= $thr . "\n";
if ($initial =~ /[a-zA-Z]/) {
rename($thr, "$dir_archive_top/$initial/$thr") or die "Couldn't move $thr: $!";
} else {
rename($thr, "$dir_archive_top/other/$thr") or die "Couldn't move $thr: $!";
}
$toarchive++;
print "$toarchive files archived\n\n" if ($toarchive % 100 == 0);
}
print "$toarchive files archived\n\n";
$toarchive ? print ARCH $archstr : print ARCH "[None.]\n";
close ARCH or die "Couldn't close $archfile after writing: $!";
}
sub _kill_old_files {
my ($config_out_ref, $nonrecentref) = @_;
my $dir_threads = ${$config_out_ref}{'dir_threads'};
my $killfile = defined ${$config_out_ref}{'deleted_today'}
? ${$config_out_ref}{'deleted_today'}
: "${$config_out_ref}{'dir_digest'}/deleted_today.txt"; # v1.95
open KILL, ">$killfile" or die "Couldn't open $killfile for writing: $!";
print KILL 'Deleted today (', scalar(localtime), "):\n";
print KILL '-' x 40, "\n";
my ($thr, $killstr);
my $tokill = 0;
foreach $thr (sort keys %{$nonrecentref}) {
print "Unlinking: $thr\n";
$killstr .= $thr . "\n";
unlink $thr or die "Couldn't unlink $thr: $!";
$tokill++;
print "$tokill files deleted\n" if ($tokill % 100 == 0);
}
print "$tokill files deleted\n";
$tokill ? print KILL $killstr : print KILL "[None.]\n";
close KILL or die "Couldn't close $killfile after writing: $!";
}
sub _get_digest_list {
my ($config_in_ref, $config_out_ref) = @_;
opendir(DIR, ${$config_out_ref}{'dir_digest'}) || die "no ${$config_out_ref}{'dir_digest'}?: $!";
my @digests =
sort { lc($a) cmp lc($b) }
grep { /${$config_in_ref}{'grep_formula'}/ }
readdir(DIR);
closedir(DIR) || die "Could not close ${$config_out_ref}{'dir_digest'}: $!";
return \@digests;
}
sub _prep_source_file {
my ($config_in_ref, $config_out_ref, $digests_ref) = @_; # v1.94
# %in_out: hash of all instances in directory of a given digest,
# value refers to digest's title and its message topics
my (%in_out, $id);
foreach (@{$digests_ref}) {
$_ =~ m/${$config_in_ref}{'pattern_target'}/;
$id = eval(${$config_out_ref}{'id_format'}); # v1.94
$in_out{$id} = [ $_ ];
}
return \%in_out;
}
sub _identify_target_digest {
my ($config_in_ref, $config_out_ref,
$dig_number, $dig_entry, $digests_ref) = @_;
my ($hit);
foreach my $digfile (@{$digests_ref}) {
$digfile =~ m/${$config_in_ref}{'pattern_target'}/;
if (defined $2) {
next unless ($2 == $dig_number);
$hit = $digfile;
last;
} elsif ((defined $1) and (! defined $2)) {
next unless ($1 == $dig_number);
$hit = $digfile;
last;
} else {
die "Could'nt process digest filename to identify target digest: $!";
}
}
if (defined $hit) {
return $hit;
} else {
print STDERR "No ${$config_out_ref}{'title'} digest numbered $dig_number could be found in directory\n";
print STDERR " ${$config_out_ref}{'dir_digest'}\n";
exit 0;
}
}
sub _get_log_data {
my ($config_out_ref, $choice, $in_out_ref) = @_;
my $dir_digest = ${$config_out_ref}{'dir_digest'};
my $dir_threads = ${$config_out_ref}{'dir_threads'};
my $logfile = ${$config_out_ref}{'digests_log'};
my $readfile = defined ${$config_out_ref}{'digests_read'} # new in 1.95
? ${$config_out_ref}{'digests_read'}
: "$dir_digest/digests_read.txt";
# hash which pulls in data from an external log file that
# records which digests have been previously processed
my (%hashlog);
open(LOG, $logfile) || die "cannot open $logfile for reading: $!";
while (<LOG>) {
chomp;
questions:
=over 4
=item 1
What internal structure has the mailing list sponsor provided for a given
digest?
=item 2
How do I want to structure the results of applying Mail::Digest::Tools to a
particular digest on my system?
=back
Each of these two questions breaks down into sub-parts. Their answers
supply you with the information with which you will construct the two
configuration hashes passed to most Mail::Digest::Tools functions.
Let us take each in turn.
=head1 C<%config_in>: THE INTERNAL STRUCTURE OF A DIGEST
The best way to learn about the internal structure of a mailing list digest
(other than to study the application which created the digest in the first
place) is to accumulate several instances of the digest on your system in a
directory devoted to that purpose. Examine the way the digest's filename is
formed. Then examine the digest file itself. You will soon pick up a feel
for the structure of the digest, which will guide you in configuring
Mail::Digest::Tools for your system. That configuration will take the form
of a Perl hash which, for illustrative purposes, we shall here call
C<%xxx_config_in> where C<xxx> is a short-hand title for a particular digest.
For heuristic purposes we will examine the characteristics of two mailing
list digests which the author has been following and archiving for several
years: ActiveState's 'Perl-Win32-Users' digest and Yahoo! Groups' Perl
Beginners group digest.
=head2 Analysis of Digest's File Name
We must study a digest's file name in order to be able to write a pattern
with which we will be able to distinguish a digest file from any non-digest
file sitting in the same directory, as well as to be able to extract the
digest number from that file name.
Once saved as plain-text files, Perl-Win32-Users digest files typically look
like this in a directory:
Perl-Win32-Users Digest, Vol 1 Issue 1771.txt
Perl-Win32-Users Digest, Vol 1 Issue 1772.txt
Similarly, the Perl Beginner digest files look like this:
[PBML] Digest Number 1491.txt
[PBML] Digest Number 1492.txt
To correctly identify Perl-Win32-Users digest files from any other files in
the same directory, we compose a string which would form the core of a Perl
regular expression, I<i.e.,> everything in a pattern except the outer
delimiters. Internally, Mail::Digest::Tools passes the file name through a
C<grep { /regexp/ }> pattern, so the first key is called C<grep_formula>.
%pw32u_config_in = (
grep_formula => 'Perl-Win32-Users Digest',
...
);
The equivalent pattern for the Perl Beginners digest would be:
%pbml_config_in = (
grep_formula => '\[PBML\]',
...
);
Note that the C<[> and C<]> characters have to be escaped with a C<\>
backslash because they are normally metacharacters inside Perl regular
expressions.
We next have to extract the digest number from the digest's file name.
Certain mailing list programs give individual digests both a 'Volume' number
as well as an individual digest number. Perl-Win32-Users typifies this. In
the example above we need to capture both the C<1> as volume number and C<1771>
as digest number. The next key in our configuration hash is called
C<pattern_target>:
%pw32u_config_in = (
grep_formula => 'Perl-Win32-Users Digest',
pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt',
...
);
Note the two sets of capturing parentheses.
Other digests, such as those at Yahoo! Groups, dispense with a volume number
and simply increment each digest number:
%pbml_config_in = (
grep_formula => '\[PBML\]',
pattern_target => '.*\s(\d+)\.txt$',
...
);
Note that this C<pattern_target> contains only one pair of capturing
parentheses.
=head2 Analysis of Digest's Internal Structure
A digest's internal structure is discussed in detail above (see
'A TYPICAL MAILING LIST DIGEST'). Here we need to identify two
characteristics: the way the digest introduces its list of today's topics
and the string it uses to delimit the list of today's topics from the first
individual message in the digest and all subsequent messages from one another.
Continuing with our two examples from above, we provide values for keys
C<topics_intro> and C<source_msg_delimiter>:
%pw32u_config_in = (
grep_formula => 'Perl-Win32-Users digest',
pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt',
topics_intro => 'Today\'s Topics:',
source_msg_delimiter => "--__--__--\n\n",
...
);
Note the escaped C<'> apostrophe character in the value for key
C<topics_intro>.
%pbml_config_in = (
grep_formula => '\[PBML\]',
pattern_target => '.*\s(\d+)\.txt$',
topics_intro => 'Topics in this digest:',
source_msg_delimiter => "________________________________________________________________________\n________________________________________________________________________\n\n",
...
);
Note that the values provided for the respective C<source_msg_delimiter> keys
had to be double-quoted strings. That's because all such delimiters include
two or more C<\n> newline characters so that they form paragraphs unto
themselves. Unless indicated otherwise, the values for all other values in
the configuration hash are single-quoted strings.
Note: In early 2004, while Mail::Digest::Tools was being prepared for its
initial distribution on CPAN, ActiveState changed certain features in the
daily digest versions of its mailing lists. Hence, the code example presented
above should not be 'copied-and-pasted' into a configuration hash with which
you, the user, might follow the current Perl-Win32-Users digest. In
particular, the source message delimiter was changed to a string of 30
hyphens followed by 2 C<\n> newline characters:
"------------------------------\n\n"
However, since it is not unheard of for contributors to a mailing list to use
such a string of hyphens within their postings or signatures, using a string
of hyphens is not a particularly apt choice for a source message delimiter.
In this particular case, the author is getting better (but not fully tested)
results by including an additional newline I<before> the hyphen string in
order to more uniquely identify the source message delimiter:
"\n------------------------------\n\n"
=head2 Analysis of Individual Messages
The internal structure of an individual message within a digest is also
discussed in detail above. Here we need to identify patterns with which we
can extract the content of the message's headers.
Certain mailing list digest programs allow a wide variety of headers to appear
in digested messages. The Perl-Win32-Users digest typifies this. Each
message in a Perl-Win32_Users digest I<must> have a message number and headers
for the message's author, recipients, subject and date.
Message: 1
From: Chris Smithson <ChrisSmithson@some.web.address.com>
To: "'Carter Kraus'" <carter@some.web.address.com>,
"Perl-Win32-Users (E-mail)" <perl-win32-users@activestate.com>
Subject: RE: OO Perl Issue.
Date: Wed, 4 Feb 2004 14:17:24 -0600
But a message in this digest may have additional headers for the author's
organization, reply address and/or carbon-copy recipients.
Message: 5
Date: Wed, 4 Feb 2004 15:15:44 -0800
From: Sam Spade <sspade@some.web.address.com>
Organization: Some Web Address
Reply-To: Sam Spade <sspade@some.web.address.com>
To: "Time" <summers@some.web.address.com>
CC: "Perl List" <perl-win32-users@listserv.activestate.com>
Subject: Re: New IE Update causes script problems
Patterns are easily developed to capture this information and store it in the
configuration hash:
%pw32u_config_in = (
grep_formula => 'Perl-Win32-Users digest',
pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt',
topics_intro => 'Today\'s Topics:',
source_msg_delimiter => "--__--__--\n\n",
message_style_flag => '^Message:\s+(\d+)$',
from_style_flag => '^From:\s+(.+)$',
org_style_flag => '^Organization:\s+(.+)$',
to_style_flag => '^To:\s+(.+)$',
cc_style_flag => '^CC:\s+(.+)$',
subject_style_flag => '^Subject:\s+(.+)$',
date_style_flag => '^Date:\s+(.+)$',
reply_to_style_flag => '^Reply-To:\s+(.+)$',
...
);
Other mailing list digest programs allow much fewer headers in digested
messages. The Yahoo! Groups digests such as Perl Beginner typify this.
Message: 4
Date: Sun, 7 Dec 2003 19:24:03 +1100
From: Philip Streets <phil@some.web.address.com.au>
Subject: RH9.0, perl 5.8.2 and qmail-localfilter question
The patterns developed to capture this information and store it in the
configuration hash would be as follows:
%pbml_config_in = (
grep_formula => '\[PBML\]',
pattern_target => '.*\s(\d+)\.txt$',
topics_intro => 'Topics in this digest:',
source_msg_delimiter => "________________________________________________________________________\n________________________________________________________________________\n\n",
message_style_flag => '^Message:\s+(\d+)$',
from_style_flag => '^\s+From:\s+(.+)$',
subject_style_flag => '^Subject:\s+(.+)$',
date_style_flag => '^\s+Date:\s+(.+)$',
...
);
Note that this pattern is written to expect 1 or more whitespaces at the
beginning of the C<from_style_flag> and the C<date_style_flag>.
We could -- but do not need to -- add the following key-value pairs to the
C<%pbml_config_in> hash.
org_style_flag => undef,
to_style_flag => undef,
cc_style_flag => undef,
reply_to_style_flag => undef,
=head2 Inspection of Messages for Multipart MIME Content
Certain mailing lists allow subscribers to post messages in either plain-text
or HTML. Certain lists allow subscribers to post attachments; others do not.
When it comes to preparing digests of these messages, the programs which
different lists take lead to different results. The most annoying situation
occurs when a list allows a subscriber to post in 'multipart MIME format' and
then fails to strip out the redundant HTML part after printing the needed
plain-text part.
I<Example:> An all too typical example from an older version of an ActiveState
list digest. (ActiveState changed the format of its digests in early 2004 to
strip out HTML attachments. Hence, the following code no longer accurately
represents what a subscriber to an ActiveState digest will see. Other mailing
lists still suffer from MIME bloat, however, so treat the following code as
illustrative.) The message begins:
Message: 1
To: Perl-Win32-Users@activestate.com
Subject: Can not tie STDOUT to scolled Tk widget
From: John_Wonderman@some.web.address.ca
Date: Thu, 15 Jan 2004 16:25:17 -0500
This is a multipart message in MIME format.
--=_alternative 00750F0485256E1C_=
Content-Type: text/plain; charset="US-ASCII"
Hi;
I am trying to implement a scrolling text widget to capture output for for
at tk app. Without scrolling:
my $text = $mw->Text(-width => 78,
-height => 32,
-wrap => 'word',
-font => ['Courier New','11']
)->pack(-side => 'bottom',
-expand => 1,
-fill => 'both',
);
...
When the plain-text part of the message is finished, it is then repeated in
HTML:
--=_alternative 00750F0485256E1C_=
Content-Type: text/html; charset="US-ASCII"
<br><font size=2 face="Tahoma">Hi;</font>
<p><font size=2 face="Tahoma">I am trying to implement a scrolling text
widget to capture output for for at tk app. Without scrolling:</font>
<p><font size=2 face="Bitstream Vera Sans Mono">my $text = $mw->Text(-width
=> 78,</font>
<br><font size=2 face="Bitstream Vera Sans Mono">
-height => 32,</font>
<br><font size=2 face="Bitstream Vera Sans Mono">
-wrap => 'word',</font>
<br><font size=2 face="Bitstream Vera Sans Mono">
-font => ['Courier New','11']</font>
<br><font size=2 face="Bitstream Vera Sans Mono">)->pack(-side =>
'bottom',</font>
<br><font size=2 face="Bitstream Vera Sans Mono">
-expand => 1,</font>
<br><font size=2 face="Bitstream Vera Sans Mono">
-fill => 'both',</font>
There is no reason to retain this bloat in your thread file. The digest
providers should have stripped it out, but the program they were using failed
to do so. Other digests, such as those at Yahoo! Groups, eliminate all this
blather.
Now, with Mail::Digest::Tools, you can eliminate much of the bloat yourself.
After examining 6-10 instances of a particular mailing list digest, you should
be able to determine whether the digest needs a dose of digital castor oil or
not, and you set key C<MIME_cleanup_flag> accordingly. If the digest contains
unnecessary multipart MIME content, you set this flag to C<1>; otherwise, to
C<0>.
And with that you have completed your analysis of the internal structure of a
given digest and entered the relevant information into the first configuration
hash:
%pw32u_config_in = (
grep_formula => 'Perl-Win32-Users digest',
pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt',
topics_intro => 'Today\'s Topics:',
source_msg_delimiter => "--__--__--\n\n",
message_style_flag => '^Message:\s+(\d+)$',
from_style_flag => '^From:\s+(.+)$',
org_style_flag => '^Organization:\s+(.+)$',
to_style_flag => '^To:\s+(.+)$',
cc_style_flag => '^CC:\s+(.+)$',
subject_style_flag => '^Subject:\s+(.+)$',
date_style_flag => '^Date:\s+(.+)$',
reply_to_style_flag => '^Reply-To:\s+(.+)$',
MIME_cleanup_flag => 1,
);
%pbml_config_in = (
grep_formula => '\[PBML\]',
pattern_target => '.*\s(\d+)\.txt$',
topics_intro => 'Topics in this digest:',
source_msg_delimiter => "________________________________________________________________________\n________________________________________________________________________\n\n",
message_style_flag => '^Message:\s+(\d+)$',
from_style_flag => '^\s+From:\s+(.+)$',
subject_style_flag => '^Subject:\s+(.+)$',
date_style_flag => '^\s+Date:\s+(.+)$',
MIME_cleanup_flag => 0,
);
=head1 C<%config_out>: HOW TO PROCESS A DIGEST ON YOUR SYSTEM
C<%config_in> holds the answers to the question: What internal structure has
the mailing list sponsor provided for a given digest? In contrast,
C<%config_out> will hold the answer to this question: How do I want to
structure the results of applying Mail::Digest::Tools to a particular digest
on my system?
For purpose of illustration, we will continue to assume that we are processing
digest files received from the Perl-Win32-Users and Perl Beginner lists. We
will make slightly different choices as to how we process those digest files
so as to illustrate different options available from Mail::Digest::Tools.
We shall also assume that we going to place the scripts from which we call
Mail::Digest::Tools functions in the directory I<above> the directories in
which we store the digest files once they have been saved as plain-text files.
If we call this directory C<digest> and place the scripts in that directory,
then we will have a directory structure that starts out like this:
digest/
process_new.pl
process_ALL.pl
reply_digest_message.pl
repair_digest_order.pl
consolidate_threads.pl
deletables.pl
pw32u/
Perl-Win32-Users Digest, Vol 1 Issue 1771.txt
Perl-Win32-Users Digest, Vol 1 Issue 1772.txt
pbml/
[PBML] Digest Number 1491.txt
[PBML] Digest Number 1492.txt
=head2 Required C<%config_out> Keys
There are 9 keys which are required in C<%config_out> in order for
Mail::Digest::Tools to function properly. They correspond to 9 decisions
which you must make in setting up a Mail::Digest::Tools configuration on
your system.
=over 4
=item 1 Title
Each digest must be given a title which is used whenever Mail::Digest::Tools
needs to prompt or warn you on standard output. The key which holds this
information in C<%config_out> must be called C<title>; the value for this
element should be sensible.
%pw32u_config_out = (
=back
=head1 HELPFUL HINTS
... in which the module author shares what he has learned using
Mail::Digest::Tools and its predecessors since August 2000.
=head2 Initial Configuration and Testing
As mentioned above, if you are considering creating a local archive of threads
originating in daily digest versions of a mailing list, you should first
accumulate 6-10 instances of such digests and both:
=over 4
=item 1
study the internal structure of the digest -- needed to develop a
C<%config_in> for the digest; and
=item 2
carefully consider how you wish to structure the output from the module's
use on your system -- needed to develop C<%config_out> for the digest
=back
Once you have developed the initial configuration, you should call
C<reprocess_ALL_digests()> on the digests, then open the files created to see
if the results are what you want. If they are I<not> what you want, then you
need to think about what you should change in C<%config_in> and/or
C<%config_out>. Make those changes, then call C<reprocess_ALL_digests()>
again. Repeat as needed, making sure not to delete any of the digest files
you are using as sources until you are completely satisfied with your
configuration.
Once, however, you I<are> satisfied with your configuration, you should call
C<process_new_digests()> on new instances of digests and I<never> call
C<reprocess_ALL_digests()> for that digest again (lest you not be able to
regenerate threads containing messages from digests you have deleted over
time).
=head2 Where to Store the Configuration Hashes
As mentioned above, you will probably find it convenient to write separate
Perl scripts to call each one of Mail::Digest::Tool's public functions. You
could code C<%config_in> and C<%config_out> in each of those scripts just
before the respective function calls. But that would violate the principle of
'Repeated Code Is a Mistake' and multiply maintenance problems. It's far
better to code the two configuration hashes in a separate plain-text file and
'require' that file into your script. That way, any changes you make in the
configuration will be automatically picked up by each script that calls a
Mail::Digest::Tools function.
Here is an example of such a file holding the configuration hashes governing
use of the Perl-Win32-Users digest, along with a script making use of that file.
# file: pw32u.digest.data
$topdir = "E:/Digest/pw32u";
%config_in = (
grep_formula => 'Perl-Win32-Users digest',
pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt',
# next element's value must be double-quoted
source_msg_delimiter => "--__--__--\n\n",
topics_intro => 'Today\'s Topics:',
message_style_flag => '^Message:\s+(\d+)$',
from_style_flag => '^From:\s+(.+)$',
org_style_flag => '^Organization:\s+(.+)$',
to_style_flag => '^To:\s+(.+)$',
cc_style_flag => '^CC:\s+(.+)$',
subject_style_flag => '^Subject:\s+(.+)$',
date_style_flag => '^Date:\s+(.+)$',
reply_to_style_flag => '^Reply-To:\s+(.+)$',
MIME_cleanup_flag => 1,
);
%config_out = (
title => 'Perl-Win32-Users',
dir_digest => $topdir,
dir_threads => "$topdir/Threads",
dir_archive_top => "$topdir/Threads/archive",
archived_today => "$topdir/archived_today.txt",
de_archived_today => "$topdir/de_archived_today.txt",
deleted_today => "$topdir/deleted_today.txt",
digests_log => "$topdir/digests_log.txt",
digests_read => "$topdir/digests_read.txt",
todays_topics => "$topdir/todays_topics.txt",
mimelog => "$topdir/mimelog.txt",
id_format => 'sprintf("%03d",$1) . \'_\' .
sprintf("%04d",$2)',
output_id_format => 'sprintf("%04d",$1)',
MIME_cleanup_log_flag => 1,
# next element's value must be double-quoted
thread_msg_delimiter => "--__--__--\n\n",
archive_kill_trigger => 1,
archive_kill_days => 14,
digests_read_flag => 1,
archive_config => 0,
);
# script: dig.pl
# USAGE: perl dig.pl
#!/usr/bin/perl
use strict;
use warnings;
use Mail::Digest::Tools qw( process_new_digests );
our (%config_in, %config_out);
my $data_file = 'pw32u.digest.data';
require $data_file;
process_new_digests(\%config_in, \%config_out);
print "\nFinished\n";
=head2 Maintaining Local Archives of More than One Digest
The module author has maintained local archives of more than a half dozen
different mailing list digests over the past several years. He has found it
convenient to maintain the configuration information for I<all> the digests
he is following at a given time in a I<single> configuration file. The
advantage to this approach is that if two digests share a similar internal
structure (perhaps due to being generated by the same mailing list program or
list provider) and if the user chooses to structure the output from the two
digests in similar or identical ways, then getting the configuration hashes
becomes much easier and the potential for error is reduced.
Here is a sample directory and file structure for maintaining archives of
two different digests on a Win32 system:
digest/
digest.data
process_new.pl
process_ALL.pl
reply_digest_message.pl
repair_digest_order.pl
consolidate_threads.pl
deletables.pl
pw32u/
Perl-Win32-Users Digest, Vol 1 Issue 1771.txt
Perl-Win32-Users Digest, Vol 1 Issue 1772.txt
digest_log.txt
digest_read.txt
mimelog.txt
Threads/
pbml/
[PBML] Digest Number 1491.txt
[PBML] Digest Number 1492.txt
digest_log.txt
Threads/
File F<digest.data> would look like this:
# digest.data
$topdir = "E:/Digest";
%digest_structure = (
pbml => {
grep_formula => '\[PBML\]',
pattern_target => '.*\s(\d+)\.txt$',
...
},
pw32u => {
grep_formula => 'Perl-Win32-Users digest',
pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt',
...
},
);
%digest_output_format = (
pbml => {
title => 'Perl Beginner',
dir_digest => "$topdir/pbml",
dir_threads => "$topdir/pbml/Threads",
...
},
pw32u => {
title => 'Perl-Win32-Users',
dir_digest => "$topdir/pw32u",
dir_threads => "$topdir/pw32u/Threads",
...
},
);
To accomodate this slightly more complex structure in the configuration file,
the calling script might be modified as follows:
# script: dig.pl
# USAGE: perl dig.pl [short-name for digest]
#!/usr/bin/perl
use Mail::Digest::Tools qw( process_new_digests );
my ($this_key, %config_in, %config_out);
# variables imported from $data_file
our (%digest_structure, %digest_output_format);
my $data_file = 'digest.data';
require $data_file;
$this_key = shift @ARGV;
die "\n The command-line argument you typed: $this_key\n does not call an accessible digest$!"
unless (defined $digest_structure{$this_key}
and defined $digest_output_format{$this_key});
my ($k,$v);
while ( ($k, $v) = each %{$digest_structure{$this_key}} ) {
$config_in{$k} = $v;
}
while ( ($k, $v) = each %{$digest_output_format{$this_key}} ) {
$config_out{$k} = $v;
}
process_new_digests(\%config_in, \%config_out);
print "\nFinished\n";
=head2 Getting Your Mail to the Right Place on Your System
For several years the module author used the scripts which were predecessors
to Mail::Digest::Tools on a Win32 system where mail was read with Microsoft
Outlook Express. He would do a "File/Save as.." on an instance of a digest,
select text format (*.txt) and save it to an appropriate directory. Later,
the author used the shareware e-mail client Poco, in which the same operation
was accomplished by highlighting a file and keying "Ctrl+S".
( run in 0.515 second using v1.01-cache-2.11-cpan-39bf76dae61 )