ACME-QuoteDB
view release on metacpan or search on metacpan
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
#$Id: LoadDB.pm,v 1.30 2009/09/30 07:37:09 dinosau2 Exp $
# /* vim:et: set ts=4 sw=4 sts=4 tw=78: */
package ACME::QuoteDB::LoadDB;
use 5.008005; # require perl 5.8.5, re: DBD::SQLite Unicode
use warnings;
use strict;
#use criticism 'brutal'; # use critic with a ~/.perlcriticrc
use version; our $VERSION = qv('0.1.1');
# with Text::CSV only use 'perl csv loader'
# 'one time' db load performance not a concern
BEGIN {local $ENV{PERL_TEXT_CSV} = 0}
use aliased 'ACME::QuoteDB::DB::Attribution' => 'Attr';
use aliased 'ACME::QuoteDB::DB::QuoteCatg' => 'QuoteCatg';
use aliased 'ACME::QuoteDB::DB::Category' => 'Catg';
use aliased 'ACME::QuoteDB::DB::Quote' => 'Quote';
use aliased 'ACME::QuoteDB::DB::DBI' => 'QDBI';
use File::Basename qw/dirname basename/;
use File::Glob qw(:globally :nocase);
use Encode qw/is_utf8 decode/;
use Data::Dumper qw/Dumper/;
use Carp qw/carp croak/;
use Text::CSV;
use Readonly;
use DBI;
# if not in utf8 latin1 is assumed
my $FILE_ENCODING = 'iso-8859-1';
Readonly my @QUOTE_FIELDS => qw/quote name source catg rating/;
# XXX refactor
sub new {
my ($class, $args) = @_;
# TODO encapsulation
my $self = bless {}, $class;
# store each record we extract - keys map to database fields
# TODO proper encapsulation
$self->{record} = {};
$self->{record}->{quote} = q{};
$self->{record}->{rating} = q{};
$self->{record}->{name} = q{};
$self->{record}->{source} = q{};
$self->{record}->{catg} = q{};
$self->{file} = $args->{file};
$self->{dir} = $args->{dir};
$self->{data} = $args->{data};
$self->{file_format} = $args->{file_format};
$FILE_ENCODING = $args->{file_encoding} || $FILE_ENCODING;
$self->{delim} = $args->{delimiter};
$self->{verbose} = $args->{verbose};
$self->{category} = $args->{category};
$self->{rating} = $args->{rating};
$self->{attr_source} = $args->{attr_source};
$self->{orig_args} = $args;
$self->{success} = undef;
# start with if set
$self->{record}->{rating} = $self->{rating};
$self->{record}->{name} = $self->{attr_source};
$self->{record}->{source} = $self->{attr_source};
if (ref $self->{category} eq 'ARRAY') {
$self->{record}->{catg} = ();
foreach my $c (@{$self->{category}}){
push @{$self->{record}->{catg}}, $c;
}
}
else {
$self->{record}->{catg} = $self->{category};
}
# db connection info
if ($ENV{ACME_QUOTEDB_DB}) {
$self->{db} = $ENV{ACME_QUOTEDB_DB};
$self->{host} = $ENV{ACME_QUOTEDB_HOST};
$self->{user} = $ENV{ACME_QUOTEDB_USER};
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
=head1 OVERVIEW
You have a collection of quotes (adages/sayings/quips/epigrams, etc) for
whatever reason, you use these quotes for whatever reason, you want to
access these quotes in a variety of ways,...
This module is part of L<ACME::QuoteDB>.
This is a Database loader, it takes data (quotes) and loads into a database,
which is then accessed by L<ACME::QuoteDB>.
See L<ACME::QuoteDB>.
=head1 USAGE
General usage, csv/tsv file in the expected format loaded to the database
my $load_db = ACME::QuoteDB::LoadDB->new({
file => '/home/me/data/sorta_funny_quotes.tsv',
file_format => 'tsv',
delimiter => "\t",
# provide a attr_source for all (if not in data)
# data is used first, if not defined use below
attr_source => 'Things Randomly Overheard',
# provide a category for all (if not in data)
category => 'Humor',
# provide a rating for all
rating => 5, # scale 1-10
});
$load_db->data_to_db;
if (!$load_db->success){print 'failed'}
Also see t/01-load_quotes.t included with the distribution.
(available from the CPAN if not included on your system)
=head1 SUBROUTINES/METHODS
This is an Object Oriented module. There is no proceedural interface.
=head2 new
Instantiate a ACME::QuoteDB::LoadDB object.
Argument is a hash ref. Params below
=head4 Data Related Parameters
=over 4
=item file or directory - one or the other required (not both)
if file, must be in our defined format, full path is needed.
if directory, full path is needed, can supply a basic glob type filter.
example:
{ file => '/home/me/data/simpsons_quotes.csv' }
{ dir => '/home/me/data/*.csv' }
=item file_format - required
can be one of: 'csv', 'tsv', 'custom', or 'html'
if 'html' or 'custom' you must supply the method for parsing.
(see tests for examples)
example:
{ file_format => 'csv' }
=item delimiter - optional, default is a comma for csv
csv/tsv options tested: comma(,) and tab(\t)
'html' - not applicable
example:
{ delimiter => "\t" }
=item category - optional, extracted from data if exists, otherwise will use what you
specify
TODO one quote to multiple categories
=item attr_source - extracted from data if exists, otherwise will use what you
specify
example:
{attr_source => 'The Simpsons'}
=item file_encoding - optional
Files being loaded are assumed to be utf8 encoded. if utf8 flag is not detected,
falls back to latin1 (iso-8859-1). If neither of these is correct, set this
option to the encoding your file is in.
=back
=head4 Operation Related Parameters
=over 4
=item dry_run - optional
do not write to the database. Use with verbose flag to see what would have beed
written.
This can be helpful for testing the outcome of Loading results.
( run in 1.151 second using v1.01-cache-2.11-cpan-39bf76dae61 )