CPAN-Testers-Data-Addresses

 view release on metacpan or  search on metacpan

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

package CPAN::Testers::Data::Addresses;

use 5.006;
use strict;
use warnings;

use vars qw($VERSION);
$VERSION = '0.17';
$|++;

#----------------------------------------------------------------------------
# Library Modules

use base qw(Class::Accessor::Fast);

use CPAN::Testers::Common::DBUtils;
use Config::IniFiles;
use DBI;
use File::Basename;
use File::Path;
use File::Slurp;
use Getopt::Long;
use IO::File;

#----------------------------------------------------------------------------
# Variables

my (%backups);

my %phrasebook = (
    'AllAddresses'          => q{SELECT * FROM tester_address},
    'AllAddressesFull'      => q{SELECT a.*,p.name,p.pause FROM tester_address AS a INNER JOIN tester_profile AS p ON p.testerid=a.testerid},
    'UpdateAddressIndex'    => q{REPLACE INTO ixaddress (id,guid,addressid,fulldate) VALUES (?,?,?,?)},

    'InsertAddress'         => q{INSERT INTO tester_address (testerid,address,email) VALUES (?,?,?)},
    'GetAddressByText'      => q{SELECT * FROM tester_address WHERE address = ?},
    'LinkAddress'           => q{UPDATE tester_address SET testerid=? WHERE addressid=?},

    'GetTesterByPause'      => q{SELECT testerid FROM tester_profile WHERE pause = ?},
    'GetTesterByName'       => q{SELECT testerid FROM tester_profile WHERE name = ?},
    'InsertTester'          => q{INSERT INTO tester_profile (name,pause) VALUES (?,?)},

    'AllReports'            => q{SELECT id,guid,tester,fulldate FROM cpanstats WHERE type=2 AND id > ? ORDER BY id LIMIT 1000000},
    'GetTestersByMonth'     => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.postdate >= '%s' AND c.state IN (...
    'GetTesters'            => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.state IN ('pass','fail','na','unk...

    # Database backup requests
    'DeleteBackup'  => 'DELETE FROM addresses',
    'CreateBackup'  => 'CREATE TABLE addresses (testerid int, name text, pause text, PRIMARY KEY (testerid))',
    'SelectBackup'  => 'SELECT * FROM tester_profile',
    'InsertBackup'  => 'INSERT INTO addresses (testerid,name,pause) VALUES (?,?,?)',

    # Consolidations
    'DuplicateAddresses'    => q{SELECT address,count(*) AS count FROM tester_address GROUP BY address ORDER BY count DESC},
    'UpdateAddress'         => q{UPDATE ixaddress SET addressid=? WHERE addressid=?},
    'DeleteAddress'         => q{DELETE FROM tester_address WHERE addressid=?},
);

my %defaults = (
    'address'   => 'data/addresses.txt',
    'mailrc'    => 'data/01mailrc.txt',
    'lastfile'  => 'data/_lastid',
    'month'     => 199000,
    'match'     => 0,
    'logclean'  => 0
);

#----------------------------------------------------------------------------
# The Application Programming Interface

sub new {
    my $class = shift;

    my $self = {};
    bless $self, $class;

    $self->_init_options(@_);
    return $self;
}

sub DESTROY {
    my $self = shift;
    $self->{fh}->close  if($self->{fh});
}

__PACKAGE__->mk_accessors(qw( lastfile logfile logclean dbh ));

sub process {
    my $self = shift;

    if($self->{options}{update}) {
        $self->update();

    } elsif($self->{options}{reindex}) {
        $self->reindex();

    } elsif($self->{options}{backup}) {
        $self->backup();

    } elsif($self->{options}{clean}) {
        $self->clean();

    } else {
        $self->search();
    }
}

sub search {
    my $self = shift;
    $self->_log("starting search");

    $self->load_addresses();
    $self->match_addresses();
    $self->print_addresses();

    $self->_log("stopping search");
}

sub update {
    my $self = shift;
    my ($new,$all,$add,$err) = (0,0,0,0);
    $self->_log("starting update");

    my $fh = IO::File->new($self->{options}{update})    or die "Cannot open mailrc file [$self->{options}{update}]: $!";
    while(<$fh>) {
        s/\s+$//;
        next unless($_);

        my ($reportid,$guid,$fulldate,$addressid,$testerid,$address,$name,$pause,$display,$match) = split(',');
        unless($address && ($name || $display)) {
            $self->_log("... bogus line: $_");
            $err++;
            next;
        }

        $addressid  ||= 0;
        $testerid   ||= 0;
        $name       ||= $display;
        $pause      ||= '';

        $all++;
        if($testerid == 0) {
            my @rows;
            @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByPause'},$pause)    if($pause);
            @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByName'},$name)      unless(@rows);

            if(@rows) {
                $testerid = $rows[0]->{testerid};
            } else {
                $testerid = $self->dbh->id_query($phrasebook{'InsertTester'},$name,$pause);
                $new++;
            }
        }

        if($addressid == 0) {
            my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$address);
            if(@rows) {
                $addressid = $rows[0]->{addressid};

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

    my $self = shift;

    $self->_log("starting reindex");

    # load known addresses
    my %address;
    my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
    while( my $row = $next->() ) {
        $address{$row->{address}} = $row->{addressid};
    }

    # search through reports updating the index
    my $lastid = defined $self->{options}{lastid} ? $self->{options}{lastid} : $self->_lastid();
    $next = $self->dbh->iterator('hash',$phrasebook{'AllReports'},($lastid || 0));
    while( my $row = $next->() ) {
        #print STDERR "row: $row->{id} $row->{tester}\n";
        if($address{$row->{tester}}) {
            $self->_log("FOUND - row: $row->{id} $row->{tester}");
            $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
        } else {
            $self->_log("NEW   - row: $row->{id} $row->{tester}");
            $address{$row->{tester}} = $self->dbh->id_query($phrasebook{'InsertAddress'},0,$row->{tester},_extract_email($row->{tester}));
            $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
        }

        $lastid = $row->{id};
    }
    $self->_lastid($lastid);
    $self->_log("stopping reindex");
}

sub clean {
    my $self = shift;
    $self->_log("starting clean");

    my $next = $self->dbh->iterator('hash',$phrasebook{'DuplicateAddresses'});
    while( my $row = $next->() ) {
        $self->_log("count=$row->{count}, address=$row->{address}");

        last    if($row->{count} <= 1);

        my %addr;
        my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$row->{address});
        $self->_log("- count=".scalar(@rows).", address=$row->{address}");
        for my $addr (@rows) {
            $self->_log("id=$addr->{addressid}, address=$addr->{address}");
            if($addr{$addr->{address}}) {
                $self->_log("replace $addr->{addressid},'$addr->{address}' => $addr{$addr->{address}}->{addressid},'$addr{$addr->{address}}->{address}'");
                $self->dbh->do_query($phrasebook{'UpdateAddress'},$addr{$addr->{address}}->{addressid},$addr->{addressid});
                $self->dbh->do_query($phrasebook{'DeleteAddress'},$addr->{addressid});

            } else {
                $addr{$addr->{address}} = $addr;
            }
        }
    }

    $self->_log("stopping clean");
}

sub backup {
    my $self = shift;

    for my $driver (keys %{$self->{backups}}) {
        if($self->{backups}{$driver}{'exists'}) {
            $self->{backups}{$driver}{db}->do_query($phrasebook{'DeleteBackup'});
        } elsif($driver =~ /(CSV|SQLite)/i) {
            $self->{backups}{$driver}{db}->do_query($phrasebook{'CreateBackup'});
        }
    }

    $self->_log("Backup via DBD drivers");

    my $rows = $self->dbh->iterator('array',$phrasebook{'SelectBackup'});
    while(my $row = $rows->()) {
        for my $driver (keys %{$self->{backups}}) {
            $self->{backups}{$driver}{db}->do_query($phrasebook{'InsertBackup'},@$row);
        }
    }

    # handle the CSV exception
    if($self->{backups}{CSV}) {
        $self->_log("Backup to CSV file");
        $self->{backups}{CSV}{db} = undef;  # close db handle
        my $fh1 = IO::File->new('addresses','r') or die "Cannot read temporary database file 'addresses'\n";
        my $fh2 = IO::File->new($self->{backups}{CSV}{dbfile},'w+') or die "Cannot write to CSV database file $self->{backups}{CSV}{dbfile}\n";
        while(<$fh1>) { print $fh2 $_ }
        $fh1->close;
        $fh2->close;
        unlink('addresses');
    }
}

sub load_addresses {
    my $self = shift;
    
    my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddressesFull'});
    while( my $row = $next->() ) {
        $self->{named_map}{$row->{name}}     = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED NAME' }  if($row->{name});
        $self->{paused_map}{$row->{pause}}   = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED PAUSE' }  if($row->{pause});
        $self->{parsed_map}{$row->{address}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED ADDRESS' };
        next    unless($row->{email});
        $self->{address_map}{$row->{email}}  = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED EMAIL' };

        my ($local,$domain) = split(/\@/,$row->{email});
        next    unless($domain);
        $self->{domain_map}{$domain}         = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED DOMAIN' };
    }

    if($self->{options}{verbose}) {
        $self->_log( "named entries   = " . scalar(keys %{ $self->{named_map}   }) . "\n" );
        $self->_log( "paused entries  = " . scalar(keys %{ $self->{paused_map}  }) . "\n" );
        $self->_log( "parsed entries  = " . scalar(keys %{ $self->{parsed_map}  }) . "\n" );
        $self->_log( "address entries = " . scalar(keys %{ $self->{address_map} }) . "\n" );
        $self->_log( "domain entries  = " . scalar(keys %{ $self->{domain_map}  }) . "\n" );
    }

    $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
    while( my $row = $next->() ) {
        next    if($self->{parsed_map}{$row->{address}});
        $self->{stored_map}{$row->{address}} = { name => '', pause => '', addressid => $row->{addressid}, testerid => 0, match => '# STORED ADDRESS' };
    }

    my $fh = IO::File->new($self->{options}{mailrc})    or die "Cannot open mailrc file [$self->{options}{mailrc}]: $!";
    while(<$fh>) {
        s/\s+$//;
        next    if(/^$/);

        my ($alias,$name,$email) = (/alias\s+([A-Z]+)\s+"([^<]+) <([^>]+)>"/);
        next    unless($alias);

        my $testerid  = $self->{address_map}{$email} ? $self->{address_map}{$email}->{testerid}  : 0;
        my $addressid = $self->{address_map}{$email} ? $self->{address_map}{$email}->{addressid} : 0;

        $self->{pause_map}{lc($alias)} = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# PAUSE ID' };
        $self->{cpan_map}{lc($email)}  = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# CPAN EMAIL' };
    }
    $fh->close;

    if($self->{options}{verbose}) {
        $self->_log( "pause entries   = " . scalar(keys %{ $self->{pause_map} }) . "\n" );
        $self->_log( "cpan entries    = " . scalar(keys %{ $self->{cpan_map}  }) . "\n" );
    }

    # grab all records for the month
    my $sql = $self->{options}{month}

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

            $self->{unparsed_map}{$key}->{match} .= " - $domain - $map";
            return 1;
        }
    }

    return 0;
}

sub map_name {
    my ($self,$key) = @_;
    my ($name) = $key =~ /\(+"?([^"\)]+)"?\)+/;
    ($name) = $key =~ /^\s*"?([^"<]+)"?\s+</    unless($name);
    return 0 unless($name);

    if($self->{named_map}{$name}) {
        $self->{unparsed_map}{$key}->{$_} = $self->{named_map}{$name}->{$_}     for(qw(testerid name pause match));
        return 1;
    }

    if($self->{paused_map}{$name}) {
        $self->{unparsed_map}{$key}->{$_} = $self->{paused_map}{$name}->{$_}    for(qw(testerid name pause match));
        return 1;
    }

    $self->{unparsed_map}{$key}{display} = $name;
    $self->{unparsed_map}{$key}{match}   = '# SUGGESTION';

    return 0;
}

#----------------------------------------------------------------------------
# Private Methods

sub _lastid {
    my ($self,$id) = @_;
    my $f = $self->lastfile();

    unless( -f $f) {
        mkpath(dirname($f));
        overwrite_file( $f, 0 );
        $id ||= 0;
    }

    if($id) { overwrite_file( $f, $id ); }
    else    { $id = read_file($f); }

    return $id;
}

sub _extract_email {
    my $str = shift;
    #my ($email) = $str =~ /([-+=\w.]+\@(?:[-\w]+\.)+(?:com|net|org|info|biz|edu|museum|mil|gov|[a-z]{2,2}))/i;
    my ($email) = $str =~ /([-+=\w.]+\@[-\w\.]+)/i;
    return $email || '';
}

sub _init_options {
    my $self = shift;
    my %hash = @_;
    $self->{options} = {};
    my @options = qw(mailrc update clean reindex lastid backup month match verbose lastfile logfile logclean output);
    my %options;

    GetOptions( \%options,

        # mandatory options
        'config|c=s',

        # update mode options
        'update|u=s',

        # clean mode options
        'clean',

        # reindex mode options
        'reindex|r',
        'lastid|l=i',

        # backup mode options
        'backup|b',

        # search mode options
        'mailrc|m=s',
        'month=s',
        'match',

        # other options
        'output=s',
        'lastfile=s',
        'logfile=s',
        'logclean=i',
        'verbose|v',
        'version',
        'help|h'
    ) or $self->_help();

    for my $opt (qw(config help version),@options) {
        $self->{options}{$opt} ||= $hash{$opt}      if($hash{$opt});
        $self->{options}{$opt} ||= $options{$opt}   if($options{$opt});
    }

    $self->_help(1) if($self->{options}{help});
    $self->_help(0) if($self->{options}{version});

    $self->_help(1,"Must specify the configuration file")                       unless(   $self->{options}{config});
    $self->_help(1,"Configuration file [$self->{options}{config}] not found")   unless(-f $self->{options}{config});

    # load configuration
    my $cfg = Config::IniFiles->new( -file => $self->{options}{config} );

    # configure databases
    my %opts;
    my $db = 'CPANSTATS';
    die "No configuration for $db database\n"   unless($cfg->SectionExists($db));
    $opts{$_} = $cfg->val($db,$_)   for(qw(driver database dbfile dbhost dbport dbuser dbpass));
    $self->dbh( CPAN::Testers::Common::DBUtils->new(%opts) );
    die "Cannot configure $db database\n" unless($self->dbh);

    # use configuration settings or defaults if none provided
    for my $opt (@options) {
        $self->{options}{$opt} ||= $cfg->val('MASTER',$opt) || $defaults{$opt};
    }

    # extract filters
    my $filters = $cfg->val('DOMAINS','filters');
    my @filters = split("\n", $filters) if($filters);
    $self->{filters} = \@filters        if(@filters);

    # mandatory options
    #for my $opt (qw()) {
    #    $self->_help(1,"No $opt configuration setting given, see help below.")                          unless(   $self->{options}{$opt});
    #    $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.")    unless(-f $self->{options}{$opt});
    #}

    # options to check if provided
    for my $opt (qw(update mailrc)) {
        next                                                                                            unless(   $self->{options}{$opt});
        $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.")    unless(-f $self->{options}{$opt});
    }

    # clean up potential rogue characters
    $self->{options}{lastid} =~ s/\D+//g    if($self->{options}{lastid});

    # prime accessors
    $self->lastfile($self->{options}{lastfile});
    $self->logfile($self->{options}{logfile});
    $self->logclean($self->{options}{logclean});

    # configure backup DBs
    if($self->{options}{backup}) {
        $self->help(1,"No configuration for BACKUPS with backup option")    unless($cfg->SectionExists('BACKUPS'));

        # available DBI drivers
        my %DRIVERS_DBI = map { $_ => 1 } DBI->available_drivers();

        my @drivers = $cfg->val('BACKUPS','drivers');
        for my $driver (@drivers) {
            $self->help(1,"No configuration for backup option '$driver'")   unless($cfg->SectionExists($driver));

            # ignore drivers that are unavailable
            unless($DRIVERS_DBI{$driver}) {
                $self->_log("Backup DBD driver '$driver' is not available");
                next;
            }

            %opts = ();
            $opts{$_} = $cfg->val($driver,$_)   for(qw(driver database dbfile dbhost dbport dbuser dbpass));
            $self->{backups}{$driver}{'exists'} = $driver =~ /SQLite/i ? -f $opts{database} : 1;

            # CSV is a bit of an oddity!
            if($driver =~ /CSV/i) {
                $self->{backups}{$driver}{'exists'} = 0;
                $self->{backups}{$driver}{'dbfile'} = $opts{dbfile};
                $opts{dbfile} = 'uploads';
                unlink($opts{dbfile});
            }

            $self->{backups}{$driver}{db} = CPAN::Testers::Common::DBUtils->new(%opts);
            $self->help(1,"Cannot configure BACKUPS database for '$driver'")   unless($self->{backups}{$driver}{db});
        }
    }

    # set output 
    if($self->{options}{output}) {
        if(my $fh = IO::File->new($self->{options}{output}, 'w+')) {
            $self->{fh} = $fh;
        }
    }

    return  unless($self->{options}{verbose});
    print STDERR "config: $_ = ".($self->{options}{$_}||'')."\n"  for(@options);
}

sub _help {
    my ($self,$full,$mess) = @_;

    print "\n$mess\n\n" if($mess);

    if($full) {
        print "\n";
        print "Usage:$0 ( [--help|h] | --version | --config|c=<file> \\\n";
        print "           ( [--update=<file>] \\\n";
        print "           | [--reindex] [--lastid=<num>] \\\n";
        print "           | [--clean] \\\n";
        print "           | [--backup] \\\n";
        print "           | [--mailrc|m=<file>] [--month=<string>] [--match] ) \\\n";
        print "           [--output=<file>] \\\n";
        print "           [--logfile=<file>] [--logclean=(0|1)] \\\n";
        print "           [--verbose|v] ) \n\n";

#              12345678901234567890123456789012345678901234567890123456789012345678901234567890
        print "This program manages the cpan-tester addresses.\n";

        print "\nFunctional Options:\n";
        print "   --config=<file>           # path/file to configuration file\n";
        print "  [--mailrc=<file>]          # path/file to mailrc file\n";
        print "  [--output=<file>]          # path/file to output file (defaults to STDOUT)\n";

        print "\nUpdate Options:\n";
        print "  [--update=<file>]          # run in update mode\n";

        print "\nReindex Options:\n";
        print "  [--reindex]                # run in reindex mode\n";
        print "  [--lastid=<num>]           # id to start reindex from\n";

        print "\nClean Options:\n";
        print "  [--clean]                  # run in clean mode (de-duplication)\n";

        print "\nBackup Options:\n";
        print "  [--backup]                 # run in backup mode\n";

        print "\nSearch Options:\n";
        print "  [--month=<string>]         # YYYYMM string to match from\n";
        print "  [--match]                  # display matches only\n";

        print "\nOther Options:\n";
        print "  [--verbose]                # turn on verbose messages\n";
        print "  [--version]                # display current version\n";
        print "  [--help]                   # this screen\n";

        print "\nFor further information type 'perldoc $0'\n";
    }

    print "$0 v$VERSION\n";
    exit(0);
}

sub _printout {
    my $self = shift;
    if(my $fh = $self->{fh}) {
        print $fh "@_\n";
    } else {
        print STDOUT "@_\n";
    }
}

sub _log {
    my $self = shift;
    my $log = $self->logfile or return;
    mkpath(dirname($log))   unless(-f $log);

    my $mode = $self->logclean ? 'w+' : 'a+';
    $self->logclean(0);

    my @dt = localtime(time);
    my $dt = sprintf "%04d/%02d/%02d %02d:%02d:%02d", $dt[5]+1900,$dt[4]+1,$dt[3],$dt[2],$dt[1],$dt[0];

    my $fh = IO::File->new($log,$mode) or die "Cannot write to log file [$log]: $!\n";
    print $fh "$dt ", @_, "\n";
    $fh->close;
}

q!Will code for a damn fine Balti!;

__END__

#----------------------------------------------------------------------------

=head1 NAME

CPAN::Testers::Data::Addresses - CPAN Testers Addresses Database Manager

=head1 SYNOPSIS

  perl addresses.pl \
        [--verbose|v] --config|c=<file> \
        ( [--help|h] \
        | [--update=<file>] \
        | [--reindex] [--lastid=<num>] \
        | [--clean] \
        | [--backup] \
        | [--mailrc|m=<file>] [--month=<string>] [--match] ) \
        [--logfile=<file>] [--logclean=(0|1)]

=head1 DESCRIPTION

This program allows the user to update the tester addresses database, or
search it, based on a restricted set of criteria.

=head1 SCHEMA

The schema for the MySQL database is below:

    CREATE TABLE `ixaddress` (
      `id` int(10) unsigned NOT NULL,
      `guid` varchar(40) NOT NULL DEFAULT '',
      `addressid` int(10) unsigned NOT NULL,
      `fulldate` varchar(32) DEFAULT NULL,
      PRIMARY KEY (`id`,`guid`),
      KEY `IXGUID` (`guid`),
      KEY `IXADDR` (`addressid`)
    ) ENGINE=MyISAM;

    CREATE TABLE `tester_address` (
      `addressid` int(10) unsigned NOT NULL AUTO_INCREMENT,
      `testerid` int(10) unsigned NOT NULL DEFAULT '0',
      `address` varchar(255) NOT NULL,
      `email` varchar(255) DEFAULT NULL,
      PRIMARY KEY (`addressid`),
      KEY `IXTESTER` (`testerid`),
      KEY `IXADDRESS` (`address`)
    ) ENGINE=MyISAM DEFAULT CHARSET UTF8;

    CREATE TABLE `tester_profile` (
      `testerid` int(10) unsigned NOT NULL AUTO_INCREMENT,
      `name` varchar(255) DEFAULT NULL,
      `pause` varchar(255) DEFAULT NULL,
      PRIMARY KEY (`testerid`),
      KEY `IXNAME` (`name`),
      KEY `IXPAUSE` (`pause`)
    ) ENGINE=MyISAM DEFAULT CHARSET=utf8;

The address field is the same as the tester field in the cpanstats table, while
the email field is the extracted email address field only.

=head1 INTERFACE

=head2 The Constructor

=over

=item * new

Instatiates the object CPAN::Testers::Data::Addresses:

  my $obj = CPAN::Testers::Data::Addresses->new();

=back

=head2 Public Methods

=over

=item * process

Based on accessor settings will run the appropriate methods for the current
execution.

=item * search

Search the tables for unmapped entries, suggesting appropriate mappings.

=item * update

Updates the tester_profiles and tester_address entries as defined by the
reference source text file.

=item * reindex

Indexes the ixaddress table, updating the tester_address table if appropriate.

=item * clean

De-duplicates addresses.

=item * backup

Provides backup files of the uploads database.

=back

=head2 Accessor Methods

=over

=item * logfile

Path to output log file for progress and debugging messages.

=item * logclean

If set to a true value will create/overwrite the logfile, otherwise will
append any messages.

=item * lastfile

Path to the file containing the last cpanstats record ID processed.

=back

=head2 Internal Methods

=over 4

=item load_addresses

Loads all the data files with addresses against we can match, then load all
the addresses listed in the DB that we need to match against.

=item match_addresses

Given all the possible mappings, attempts to match unmapped addresses.

=item print_addresses

Prints the suggested mappings, and those remaining unmapped addresses.

=item map_address

Atempts to map an address to a known CPAN author, then to one that already 
exists in the database, and finally to one that is known within the CPAN 
Authors index.

=item map_domain

Attempts to map an address based on its domain.

=item map_name

Attempts to map any name listed in the address to a tester profile.

=back

=cut

=head2 Private Methods

=over



( run in 3.210 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )