CPAN-Testers-Data-Uploads

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

0.18    2012-11-26
        - spelling fixes. (thanks to Florian Schlichting & Gregor Herrmann).
        - CHANGES => Changes.
        - change file dates changed to meet W3CDTF standards.
        - Artistic License v2 now sole license.
        - added META.json.
        - added minimum perl version (5.006).
        - reworked Makefile.PL for clarity.
        - more tests.
        - journal file now configurable.
        - check available drivers for backups.

0.17    2011-02-15
        - removed delete file test, as Win32 can't cope.
        - added journalling to speed up backup generation.

0.16    2010-12-13
        - new table 'uploads_failed' to record those distribution uploads that
          do not parse using CPAN::DistnameInfo.
        - now require CPAN::DistnameInfo-0.10 or above.

0.15    2010-11-29
        - fixed UpdateIndex SQL query.
        - dropped fast reindex, due to flaw in design.
        - reworked reindex to better manage reindexing by author.

Changes  view on Meta::CPAN

0.05    2009-02-15
        - added missing DBD prerequisites.

0.04    2009-02-13
        - changed to more formal package release.

0.03    2008-12-29
        - renamed Article reference.

0.02    2008-11-18
        - added multiple backup mechanism.
        - added documentation.
        - abstracted sources into configuration file.

0.01    2008-11-10
        - initial release

MANIFEST  view on Meta::CPAN

META.yml
README
t/01base.t
t/05setup_db-uploads.t
t/09setup-expected.t
t/20attributes.t
t/22help.t
t/30reindex.t
t/31generate.t
t/32update.t
t/33backup.t
t/50logging.ini
t/50logging.t
t/52lastid.t
t/59cleanup.t
t/90podtest.t
t/91podcover.t
t/94metatest.t
t/95changedate.t
t/96metatest.t
t/CTDU_Testing.pm

examples/uploads.pl  view on Meta::CPAN

__END__

#----------------------------------------------------------------------------

=head1 NAME

uploads.pl - creates, updates and/or backs up the uploads database.

=head1 SYNOPSIS

  perl uploads.pl --config=<file> (-generate | -update | -backup | -h | -v)

=head1 DESCRIPTION

This program allows the user to create, update and backup the uploads database,
either as separate commands, or a combination of all three. The process order
will always be CREATE->UPDATE->BACKUP, regardless of the order the options
appear on the command line.

The Uploads database contains basic information about the history of CPAN. It
records the release dates of everything that is uploaded to CPAN, both within
a BACKPAN repository, a current CPAN repository and the latest uploads posted
by PAUSE, which may not have yet reached the CPAN mirrors.

A simple schema for the MySQL database is below:

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

use File::Path;
use File::Slurp;
use Getopt::Long;
use IO::AtomicFile;
use IO::File;
use Net::NNTP;

#----------------------------------------------------------------------------
# Variables

my (%backups);
use constant    LASTMAIL    => '_lastmail';
use constant    LOGFILE     => '_uploads.log';
use constant    JOURNAL     => '_journal.sql';

my %phrasebook = (
    'FindAuthor'        => 'SELECT * FROM ixlatest WHERE author=?',

    'FindDistVersion'   => 'SELECT type FROM uploads WHERE author=? AND dist=? AND version=?',
    'InsertDistVersion' => 'INSERT INTO uploads (type,author,dist,version,filename,released) VALUES (?,?,?,?,?,?)',
    'UpdateDistVersion' => 'UPDATE uploads SET type=? WHERE author=? AND dist=? AND version=?',

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

    'InsertIndex'       => 'INSERT INTO ixlatest (oncpan,author,version,released,dist) VALUES (?,?,?,?,?)',
    'AmendIndex'        => 'UPDATE ixlatest SET oncpan=? WHERE author=? AND version=? AND dist=?',
    'UpdateIndex'       => 'UPDATE ixlatest SET oncpan=?,version=?,released=? WHERE dist=? AND author=?',
    'BuildAuthorIndex'  => 'SELECT x.author,x.version,x.released,x.dist,x.type FROM (SELECT dist, MAX(released) AS mv FROM uploads WHERE author=? GROUP BY dist) AS y INNER JOIN uploads AS x ON x.dist=y.dist AND x.released=y.mv ORDER BY released',
    'GetAllAuthors'     => 'SELECT distinct(author) FROM uploads',

    'InsertRequest'     => 'INSERT INTO page_requests (type,name,weight) VALUES (?,?,5)',

    'ParseFailed'       => 'REPLACE INTO uploads_failed (source,type,dist,version,file,pause,created) VALUES (?,?,?,?,?,?,?)',

    # SQLite backup
    'CreateTable'       => 'CREATE TABLE uploads (type text, author text, dist text, version text, filename text, released int)',
);

my $extn = qr/\.(tar\.(gz|bz2)|tgz|zip)$/;

my %oncpan = (
    'backpan'   => 2,
    'cpan'      => 1,
    'upload'    => 1
);

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

    $self->_init_options(@_);
    return $self;
}

sub DESTROY {
    my $self = shift;
}

__PACKAGE__->mk_accessors(
    qw( uploads backpan cpan logfile logclean lastfile journal
        mgenerate mupdate mbackup mreindex ));

sub process {
    my $self = shift;
    $self->generate()       if($self->mgenerate);
    $self->reindex()        if($self->mreindex);
    $self->update()         if($self->mupdate);
    $self->backup()         if($self->mbackup);
}

sub generate {
    my $self = shift;
    my $db = $self->uploads;

    $self->_log("Restarting uploads database");
    $db->do_query($phrasebook{'DeleteAll'});

    $self->_log("Creating BACKPAN entries");

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

        $self->_update_index($cpanid,$version,$date,$name,1);
        my @rows = $db->get_query('array',$phrasebook{'FindDistVersion'},$cpanid,$name,$version);
        next    if(@rows);
        $self->_write_journal('InsertDistVersion','upload',$cpanid,$name,$version,$filename,$date);
    }

    $self->_lastid($last);
    $self->_close_journal();
}

sub backup {
    my $self = shift;
    my $db = $self->uploads;

    if(my @journals = $self->_find_journals()) {
        for my $driver (keys %backups) {
            if($driver =~ /(CSV|SQLite)/i && !$backups{$driver}{'exists'}) {
                $backups{$driver}{db}->do_query($phrasebook{'CreateTable'});
                $backups{$driver}{'exists'} = 1;
            }
        }
        
        for my $journal (@journals) {
            next    if($journal =~ /TMP$/); # don't process active journals
            $self->_log("Processing journal $journal");
            my $lines = $self->_read_journal($journal);
            for my $line (@$lines) {
                my ($phrase,@args) = @$line;
                for my $driver (keys %backups) {
                    $backups{$driver}{db}->do_query($phrasebook{$phrase},@args);
                }
            }

           $self->_done_journal($journal);
        }
        $self->_log("Processed journals");
    } else {
        for my $driver (keys %backups) {
            if($backups{$driver}{'exists'}) {
                $backups{$driver}{db}->do_query($phrasebook{'DeleteAll'});
            } elsif($driver =~ /(CSV|SQLite)/i) {
                $backups{$driver}{db}->do_query($phrasebook{'CreateTable'});
                $backups{$driver}{'exists'} = 1;
            }
        }

        $self->_log("Backup via DBD drivers");

        my $rows = $db->iterator('array',$phrasebook{'SelectAll'});
        while(my $row = $rows->()) {
            for my $driver (keys %backups) {
                $backups{$driver}{db}->do_query($phrasebook{'InsertDistVersion'},@$row);
            }
        }
    }

    # handle the CSV exception
    if($backups{CSV}) {
        $self->_log("Backup to CSV file");
        $backups{CSV}{db} = undef;  # close db handle
        my $fh1 = IO::File->new('uploads','r') or die "Cannot read temporary database file 'uploads'\n";
        my $fh2 = IO::File->new($backups{CSV}{dbfile},'w+') or die "Cannot write to CSV database file $backups{CSV}{dbfile}\n";
        while(<$fh1>) { print $fh2 $_ }
        $fh1->close;
        $fh2->close;
        unlink('uploads');
    }
}

sub help {
    my ($self,$full,$mess) = @_;

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

        print <<HERE;

Usage: $0 --config=<file> [-g] [-r] [-u] [-b] [-h] [-v]
        [--logfile=<file>] [--logclean] 
        [--lastmail=<file>] [--journal=<file>]

  --config=<file>   database configuration file
  -g                generate new database
  -r                reindex database (*)
  -u                update existing database
  -b                backup database to portable files
  -h                this help screen
  -v                program version
  --logfile=<file>  trace log file
  --logclean        overwrite exisiting log file
  --lastmail=<file> last id file
  --journal=<file>  SQL journal file path

Notes:
  * A generate request automatically includes a reindex.

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

sub _init_options {
    my $self = shift;
    my %hash  = @_;
    my %options;

    GetOptions( \%options,
        'config=s',
        'generate|g',
        'update|u',
        'reindex|r',
        'backup|b',
        'journal|j=s',
        'logfile|l=s',
        'logclean=s',
        'lastfile=s',
        'help|h',
        'version|v'
    );

    # default to API settings if no command line option
    for(qw(config generate update reindex fast backup help version)) {
        $options{$_} ||= $hash{$_}  if(defined $hash{$_});
    }

    $self->help(1)  if($options{help});
    $self->help(0)  if($options{version});

    $self->help(1,"Must specify at least one option from 'generate' (-g), 'reindex' (-r),\n'update' (-u)  and/or 'backup' (-b)")
                                                                        unless($options{generate} || $options{update} || $options{backup} || $options{reindex});
    $self->help(1,"Must specific the configuration file")               unless(   $options{config});
    $self->help(1,"Configuration file [$options{config}] not found")    unless(-f $options{config});

    # load configuration
    my $cfg = Config::IniFiles->new( -file => $options{config} );

    # configure sources
    if($options{generate}) {
        my $dir = $cfg->val('MASTER','BACKPAN');
        $self->help(1,"No source location for 'BACKPAN' in config file")    if(!   $dir);

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

    $self->lastfile( $hash{lastfile} || $options{lastfile} || $cfg->val('MASTER','lastfile' ) || LASTMAIL );
    $self->journal(  $hash{journal}  || $options{journal}  || $cfg->val('MASTER','journal'  ) || JOURNAL  );

    # configure upload DB
    $self->help(1,"No configuration for UPLOADS database") unless($cfg->SectionExists('UPLOADS'));
    my %opts = map {$_ => ($cfg->val('UPLOADS',$_) || undef)} qw(driver database dbfile dbhost dbport dbuser dbpass);
    my $db = CPAN::Testers::Common::DBUtils->new(%opts);
    $self->help(1,"Cannot configure UPLOADS database") unless($db);
    $self->uploads($db);

    # configure backup DBs
    if($options{backup}) {
        $self->help(1,"No configuration for BACKUPS with backup option")    unless($cfg->SectionExists('BACKUPS'));

        my %available_drivers = map { $_ => 1 } DBI->available_drivers;
        my @drivers = $cfg->val('BACKUPS','drivers');
        for my $driver (@drivers) {
            unless($available_drivers{$driver}) {
                warn "No DBI support for '$driver', ignoring\n";
                next;
            }

            $self->help(1,"No configuration for backup option '$driver'")   unless($cfg->SectionExists($driver));

            my %opt = map {$_ => ($cfg->val($driver,$_) || undef)} qw(driver database dbfile dbhost dbport dbuser dbpass);
            $backups{$driver}{'exists'} = $driver =~ /SQLite/i ? -f $opt{database} : 1;

            # CSV is a bit of an oddity!
            if($driver =~ /CSV/i) {
                $backups{$driver}{'exists'} = 0;
                $backups{$driver}{'dbfile'} = $opt{dbfile};
                $opt{dbfile} = 'uploads';
                unlink($opt{dbfile});
            }

            $backups{$driver}{db} = CPAN::Testers::Common::DBUtils->new(%opt);
            $self->help(1,"Cannot configure BACKUPS database for '$driver'")   unless($backups{$driver}{db});
        }

        $self->mbackup(1)   if(keys %backups);
    }
}

sub _log {
    my $self = shift;
    my $log = $self->logfile or return;
    mkpath(dirname($log))   unless(-f $log);

    my $mode = $self->logclean ? 'w+' : 'a+';
    $self->logclean(0);

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

__END__

#----------------------------------------------------------------------------

=head1 NAME

CPAN::Testers::Data::Uploads - CPAN Testers Uploads Database Generator

=head1 SYNOPSIS

  perl uploads.pl --config=<file> [--generate] [--reindex] [--update] [--backup]

=head1 DESCRIPTION

This program allows the user to create, update and backup the uploads database,
either as separate commands, or a combination of all three. The process order
will always be CREATE->UPDATE->BACKUP, regardless of the order the options
appear on the command line.

The Uploads database contains basic information about the history of CPAN. It
records the release dates of everything that is uploaded to CPAN, both within
a BACKPAN repository, a current CPAN repository and the latest uploads posted
by PAUSE, which may not have yet reached the CPAN mirrors.

A simple schema for the MySQL database is below:

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

Generates a new uploads and ixlatest database.

=item * reindex

Rebuilds the ixlatest table for all entries.

=item * update

Updates the uploads and ixlatest databases.

=item * backup

Provides backup files of the uploads database.

=item * help

Provides a help screen.

=back

=head2 Accessor Methods

=over

lib/CPAN/Testers/Data/Uploads.pm  view on Meta::CPAN

Default file: '_journal.sql'.

=item * mgenerate

If set to a true value runs in generate mode for the process method().

=item * mupdate

If set to a true value runs in update mode for the process method().

=item * mbackup

If set to a true value runs in backup mode for the process method().

=item * mreindex

If set to a true value runs in reindex mode for the process method().

=back

=head2 Private Methods

=over

t/20attributes.t  view on Meta::CPAN

    isnt( $obj->$k(), undef, "$label has default" );
    is( $obj->$k(123), 123, "$label set" ); # chained, so returns object, not value.
    is( $obj->$k, 123, "$label get" );
  };
}

# undefined attributes
foreach my $k ( qw/
    backpan
    mgenerate
    mbackup
    mreindex
/ ){
  my $label = "[$k]";
  SKIP: {
    ok( $obj->can($k), "$label can" )
	or skip "'$k' attribute missing", 3;
    is( $obj->$k(), undef, "$label has no default" );
    is( $obj->$k(123), 123, "$label set" ); # chained, so returns object, not value.
    is( $obj->$k, 123, "$label get" );
  };

t/33backup.t  view on Meta::CPAN

#!/usr/bin/perl -w
use strict;

# testing the backup process

use CPAN::Testers::Data::Uploads;
use DBI;
use Test::More tests => 5;

my %available_drivers = map { $_ => 1 } DBI->available_drivers;

my $config  = 't/_DBDIR/test-config.ini';
my $sqlite  = 't/_DBDIR/uploads.db';
my $csvfile = 't/_DBDIR/uploads.csv';

SKIP: {
    skip "Test::Database required for DB testing", 5 unless(-f $config);

    my $obj;
    eval { $obj = CPAN::Testers::Data::Uploads->new( config => $config, backup => 1 ) };
    isa_ok($obj,'CPAN::Testers::Data::Uploads');

    SKIP: {
        skip "Problem creating object", 4 unless($obj);

        ok( ! -f $sqlite, '.. no SQLite backup' );
        ok( ! -f $csvfile, '.. no CSV backup' );

        $obj->process;

        ok( -f $sqlite, '.. got SQLite backup' );

        SKIP: {
            skip "DBD::CSV not installed", 1 unless($available_drivers{'CSV'});
            ok( -f $csvfile, '.. got CSV backup' );
        }
    }
}



( run in 1.287 second using v1.01-cache-2.11-cpan-49f99fa48dc )