CPAN-Testers-Data-Addresses

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

        - fixed skip numbers.
        - added version option.
        - added help tests.

0.13    2014-09-10
        - fixed license fields in META.json to be lists.
        - extended test suite.
        - added more process tests.
        - rearranged configuration, to optimise logging.
        - now checks and stores available drivers to ensure we have the 
          required backup drivers installed.
        - reordered option loading.

0.12    2013-08-07
        - forgot to update the skip value in tests :(

0.11    2013-08-04
        - added testerid, guid & fulldate to file format, so we can update the
          ixaddress entries too.
        - map against tester's display name.
        - clean functionality added to de-duplicate addresses.

Changes  view on Meta::CPAN

        - removed references to NNTPID.
        - added more domain filters to example settings.ini.

0.04    2009-11-26
        - added text for README
        - updated documentation for wrapper script
        - added and amended various links

0.03    2009-11-24
        - added example settings file
        - added backup code

0.02    2009-11-24
        - added counters to update
        - removed debugging statements
        - added further address matching comparisons
        - added missing prerequisite
        - abstracted filters out into config file.

0.01    2009-11-22
        - initial release

MANIFEST  view on Meta::CPAN

META.json
META.yml
README
t/01base.t
t/05setup_db-cpanstats.t
t/20attributes.ini
t/20attributes.t
t/21internal.t
t/22processes.t
t/23clean.t
t/24backups.t
t/25updates.t
t/26search.t
t/27help.t
t/50logging.t
t/59cleanup.t
t/90podtest.t
t/91podcover.t
t/94metatest.t
t/95changedate.t
t/96metatest.t

bin/addresses.pl  view on Meta::CPAN

addresses.pl - helper script to map tester addresses to real people.

=head1 SYNOPSIS

  perl addresses.pl \
        [--verbose|v] --config|c=<file> \
        ( [--help|h] \
        | [--update=<file>] \
        | [--reindex] [--lastid=<num>] \
        | [--clean] \
        | [--backup] \
        | [--mailrc|m=<file>] [--month=<string>] [--match] ) \
        [--logfile=<file>] [--logclean=(0|1)]

=head1 DESCRIPTION

Using the cpanstats database, this program can be used to update, reindex,
backup and search the tester address tables. 

When searching, the program tries to match unmatched tester addresses to either
a cpan author or an already known tester. For the remaining addresses, an 
attempt at pattern matching is made to try and identify similar addresses in 
the hope they can be manually identified.

=cut

# -------------------------------------
# Library Modules

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

use DBI;
use File::Basename;
use File::Path;
use File::Slurp;
use Getopt::Long;
use IO::File;

#----------------------------------------------------------------------------
# Variables

my (%backups);

my %phrasebook = (
    'AllAddresses'          => q{SELECT * FROM tester_address},
    'AllAddressesFull'      => q{SELECT a.*,p.name,p.pause FROM tester_address AS a INNER JOIN tester_profile AS p ON p.testerid=a.testerid},
    'UpdateAddressIndex'    => q{REPLACE INTO ixaddress (id,guid,addressid,fulldate) VALUES (?,?,?,?)},

    'InsertAddress'         => q{INSERT INTO tester_address (testerid,address,email) VALUES (?,?,?)},
    'GetAddressByText'      => q{SELECT * FROM tester_address WHERE address = ?},
    'LinkAddress'           => q{UPDATE tester_address SET testerid=? WHERE addressid=?},

    'GetTesterByPause'      => q{SELECT testerid FROM tester_profile WHERE pause = ?},
    'GetTesterByName'       => q{SELECT testerid FROM tester_profile WHERE name = ?},
    'InsertTester'          => q{INSERT INTO tester_profile (name,pause) VALUES (?,?)},

    'AllReports'            => q{SELECT id,guid,tester,fulldate FROM cpanstats WHERE type=2 AND id > ? ORDER BY id LIMIT 1000000},
    'GetTestersByMonth'     => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.postdate >= '%s' AND c.state IN (...
    'GetTesters'            => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.state IN ('pass','fail','na','unk...

    # Database backup requests
    'DeleteBackup'  => 'DELETE FROM addresses',
    'CreateBackup'  => 'CREATE TABLE addresses (testerid int, name text, pause text, PRIMARY KEY (testerid))',
    'SelectBackup'  => 'SELECT * FROM tester_profile',
    'InsertBackup'  => 'INSERT INTO addresses (testerid,name,pause) VALUES (?,?,?)',

    # Consolidations
    'DuplicateAddresses'    => q{SELECT address,count(*) AS count FROM tester_address GROUP BY address ORDER BY count DESC},
    'UpdateAddress'         => q{UPDATE ixaddress SET addressid=? WHERE addressid=?},
    'DeleteAddress'         => q{DELETE FROM tester_address WHERE addressid=?},
);

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN


sub process {
    my $self = shift;

    if($self->{options}{update}) {
        $self->update();

    } elsif($self->{options}{reindex}) {
        $self->reindex();

    } elsif($self->{options}{backup}) {
        $self->backup();

    } elsif($self->{options}{clean}) {
        $self->clean();

    } else {
        $self->search();
    }
}

sub search {

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN


            } else {
                $addr{$addr->{address}} = $addr;
            }
        }
    }

    $self->_log("stopping clean");
}

sub backup {
    my $self = shift;

    for my $driver (keys %{$self->{backups}}) {
        if($self->{backups}{$driver}{'exists'}) {
            $self->{backups}{$driver}{db}->do_query($phrasebook{'DeleteBackup'});
        } elsif($driver =~ /(CSV|SQLite)/i) {
            $self->{backups}{$driver}{db}->do_query($phrasebook{'CreateBackup'});
        }
    }

    $self->_log("Backup via DBD drivers");

    my $rows = $self->dbh->iterator('array',$phrasebook{'SelectBackup'});
    while(my $row = $rows->()) {
        for my $driver (keys %{$self->{backups}}) {
            $self->{backups}{$driver}{db}->do_query($phrasebook{'InsertBackup'},@$row);
        }
    }

    # handle the CSV exception
    if($self->{backups}{CSV}) {
        $self->_log("Backup to CSV file");
        $self->{backups}{CSV}{db} = undef;  # close db handle
        my $fh1 = IO::File->new('addresses','r') or die "Cannot read temporary database file 'addresses'\n";
        my $fh2 = IO::File->new($self->{backups}{CSV}{dbfile},'w+') or die "Cannot write to CSV database file $self->{backups}{CSV}{dbfile}\n";
        while(<$fh1>) { print $fh2 $_ }
        $fh1->close;
        $fh2->close;
        unlink('addresses');
    }
}

sub load_addresses {
    my $self = shift;
    

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

    my $str = shift;
    #my ($email) = $str =~ /([-+=\w.]+\@(?:[-\w]+\.)+(?:com|net|org|info|biz|edu|museum|mil|gov|[a-z]{2,2}))/i;
    my ($email) = $str =~ /([-+=\w.]+\@[-\w\.]+)/i;
    return $email || '';
}

sub _init_options {
    my $self = shift;
    my %hash = @_;
    $self->{options} = {};
    my @options = qw(mailrc update clean reindex lastid backup month match verbose lastfile logfile logclean output);
    my %options;

    GetOptions( \%options,

        # mandatory options
        'config|c=s',

        # update mode options
        'update|u=s',

        # clean mode options
        'clean',

        # reindex mode options
        'reindex|r',
        'lastid|l=i',

        # backup mode options
        'backup|b',

        # search mode options
        'mailrc|m=s',
        'month=s',
        'match',

        # other options
        'output=s',
        'lastfile=s',
        'logfile=s',

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

    }

    # clean up potential rogue characters
    $self->{options}{lastid} =~ s/\D+//g    if($self->{options}{lastid});

    # prime accessors
    $self->lastfile($self->{options}{lastfile});
    $self->logfile($self->{options}{logfile});
    $self->logclean($self->{options}{logclean});

    # configure backup DBs
    if($self->{options}{backup}) {
        $self->help(1,"No configuration for BACKUPS with backup option")    unless($cfg->SectionExists('BACKUPS'));

        # available DBI drivers
        my %DRIVERS_DBI = map { $_ => 1 } DBI->available_drivers();

        my @drivers = $cfg->val('BACKUPS','drivers');
        for my $driver (@drivers) {
            $self->help(1,"No configuration for backup option '$driver'")   unless($cfg->SectionExists($driver));

            # ignore drivers that are unavailable
            unless($DRIVERS_DBI{$driver}) {
                $self->_log("Backup DBD driver '$driver' is not available");
                next;
            }

            %opts = ();
            $opts{$_} = $cfg->val($driver,$_)   for(qw(driver database dbfile dbhost dbport dbuser dbpass));
            $self->{backups}{$driver}{'exists'} = $driver =~ /SQLite/i ? -f $opts{database} : 1;

            # CSV is a bit of an oddity!
            if($driver =~ /CSV/i) {
                $self->{backups}{$driver}{'exists'} = 0;
                $self->{backups}{$driver}{'dbfile'} = $opts{dbfile};
                $opts{dbfile} = 'uploads';
                unlink($opts{dbfile});
            }

            $self->{backups}{$driver}{db} = CPAN::Testers::Common::DBUtils->new(%opts);
            $self->help(1,"Cannot configure BACKUPS database for '$driver'")   unless($self->{backups}{$driver}{db});
        }
    }

    # set output 
    if($self->{options}{output}) {
        if(my $fh = IO::File->new($self->{options}{output}, 'w+')) {
            $self->{fh} = $fh;
        }
    }

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

    my ($self,$full,$mess) = @_;

    print "\n$mess\n\n" if($mess);

    if($full) {
        print "\n";
        print "Usage:$0 ( [--help|h] | --version | --config|c=<file> \\\n";
        print "           ( [--update=<file>] \\\n";
        print "           | [--reindex] [--lastid=<num>] \\\n";
        print "           | [--clean] \\\n";
        print "           | [--backup] \\\n";
        print "           | [--mailrc|m=<file>] [--month=<string>] [--match] ) \\\n";
        print "           [--output=<file>] \\\n";
        print "           [--logfile=<file>] [--logclean=(0|1)] \\\n";
        print "           [--verbose|v] ) \n\n";

#              12345678901234567890123456789012345678901234567890123456789012345678901234567890
        print "This program manages the cpan-tester addresses.\n";

        print "\nFunctional Options:\n";
        print "   --config=<file>           # path/file to configuration file\n";

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

        print "  [--update=<file>]          # run in update mode\n";

        print "\nReindex Options:\n";
        print "  [--reindex]                # run in reindex mode\n";
        print "  [--lastid=<num>]           # id to start reindex from\n";

        print "\nClean Options:\n";
        print "  [--clean]                  # run in clean mode (de-duplication)\n";

        print "\nBackup Options:\n";
        print "  [--backup]                 # run in backup mode\n";

        print "\nSearch Options:\n";
        print "  [--month=<string>]         # YYYYMM string to match from\n";
        print "  [--match]                  # display matches only\n";

        print "\nOther Options:\n";
        print "  [--verbose]                # turn on verbose messages\n";
        print "  [--version]                # display current version\n";
        print "  [--help]                   # this screen\n";

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

CPAN::Testers::Data::Addresses - CPAN Testers Addresses Database Manager

=head1 SYNOPSIS

  perl addresses.pl \
        [--verbose|v] --config|c=<file> \
        ( [--help|h] \
        | [--update=<file>] \
        | [--reindex] [--lastid=<num>] \
        | [--clean] \
        | [--backup] \
        | [--mailrc|m=<file>] [--month=<string>] [--match] ) \
        [--logfile=<file>] [--logclean=(0|1)]

=head1 DESCRIPTION

This program allows the user to update the tester addresses database, or
search it, based on a restricted set of criteria.

=head1 SCHEMA

lib/CPAN/Testers/Data/Addresses.pm  view on Meta::CPAN

reference source text file.

=item * reindex

Indexes the ixaddress table, updating the tester_address table if appropriate.

=item * clean

De-duplicates addresses.

=item * backup

Provides backup files of the uploads database.

=back

=head2 Accessor Methods

=over

=item * logfile

Path to output log file for progress and debugging messages.

t/22processes.t  view on Meta::CPAN

    like($text, qr/PATTERNS:/, '.. found patterns');

    # test reindex
    ok( $obj = CPAN::Testers::Data::Addresses->new(config => $config, reindex => 1), "got object" );
    is( $obj->_lastid, 0, "before reindex" );
    $obj->process;
    is( $obj->_lastid, 2975969, "after reindex" );


    #TODO:
    #$obj = CPAN::Testers::Data::Addresses->new(config => 't/test-config.ini', backup => 1);
}

t/24backups.t  view on Meta::CPAN


# available DBI drivers
my %DRIVERS_DBI = map { $_ => 1 } DBI->available_drivers();

SKIP: {
    skip "Unable to locate config file [$config]", 5    unless(-f $config);

    ### Prepare object
    my $obj;
    unlink($output)  if(-f $output);
    ok( $obj = CPAN::Testers::Data::Addresses->new(config => $config, output => $output, backup => 1), "got object" );

    $obj->backup;

    is(-f 't/_DBDIR/address.bogus'  ? 1 : 0, 0, ".. BOGUS backup doesn't exist");

    SKIP: {
        skip "SQLite driver not installed", 1   unless($DRIVERS_DBI{SQLite});
        is(-f 't/_DBDIR/address.db'     ? 1 : 0, 1, '.. SQLite backup exists');
    }

    SKIP: {
        skip "CSV driver not installed", 2      unless($DRIVERS_DBI{CSV});
        is(-f 't/_DBDIR/address'        ? 1 : 0, 0, ".. default CSV backup doesn't exist");
        is(-f 't/_DBDIR/address.csv'    ? 1 : 0, 1, '.. CSV backup exists');
    }
}



( run in 0.585 second using v1.01-cache-2.11-cpan-49f99fa48dc )