ACME-QuoteDB
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
#$Id: LoadDB.pm,v 1.30 2009/09/30 07:37:09 dinosau2 Exp $
# /* vim:et: set ts=4 sw=4 sts=4 tw=78: */
package ACME::QuoteDB::LoadDB;
use 5.008005; # require perl 5.8.5, re: DBD::SQLite Unicode
use warnings;
use strict;
#use criticism 'brutal'; # use critic with a ~/.perlcriticrc
use version; our $VERSION = qv('0.1.1');
# with Text::CSV only use 'perl csv loader'
# 'one time' db load performance not a concern
BEGIN {local $ENV{PERL_TEXT_CSV} = 0}
use aliased 'ACME::QuoteDB::DB::Attribution' => 'Attr';
use aliased 'ACME::QuoteDB::DB::QuoteCatg' => 'QuoteCatg';
use aliased 'ACME::QuoteDB::DB::Category' => 'Catg';
use aliased 'ACME::QuoteDB::DB::Quote' => 'Quote';
use aliased 'ACME::QuoteDB::DB::DBI' => 'QDBI';
use File::Basename qw/dirname basename/;
use File::Glob qw(:globally :nocase);
use Encode qw/is_utf8 decode/;
use Data::Dumper qw/Dumper/;
use Carp qw/carp croak/;
use Text::CSV;
use Readonly;
use DBI;
# if not in utf8 latin1 is assumed
my $FILE_ENCODING = 'iso-8859-1';
Readonly my @QUOTE_FIELDS => qw/quote name source catg rating/;
# XXX refactor
sub new {
my ($class, $args) = @_;
# TODO encapsulation
my $self = bless {}, $class;
# store each record we extract - keys map to database fields
# TODO proper encapsulation
$self->{record} = {};
$self->{record}->{quote} = q{};
$self->{record}->{rating} = q{};
$self->{record}->{name} = q{};
$self->{record}->{source} = q{};
$self->{record}->{catg} = q{};
$self->{file} = $args->{file};
$self->{dir} = $args->{dir};
$self->{data} = $args->{data};
$self->{file_format} = $args->{file_format};
$FILE_ENCODING = $args->{file_encoding} || $FILE_ENCODING;
$self->{delim} = $args->{delimiter};
$self->{verbose} = $args->{verbose};
$self->{category} = $args->{category};
$self->{rating} = $args->{rating};
$self->{attr_source} = $args->{attr_source};
$self->{orig_args} = $args;
$self->{success} = undef;
# start with if set
$self->{record}->{rating} = $self->{rating};
$self->{record}->{name} = $self->{attr_source};
$self->{record}->{source} = $self->{attr_source};
if (ref $self->{category} eq 'ARRAY') {
$self->{record}->{catg} = ();
foreach my $c (@{$self->{category}}){
push @{$self->{record}->{catg}}, $c;
}
}
else {
$self->{record}->{catg} = $self->{category};
}
# db connection info
if ($ENV{ACME_QUOTEDB_DB}) {
$self->{db} = $ENV{ACME_QUOTEDB_DB};
$self->{host} = $ENV{ACME_QUOTEDB_HOST};
$self->{user} = $ENV{ACME_QUOTEDB_USER};
$self->{pass} = $ENV{ACME_QUOTEDB_PASS};
}
if (!$args->{dry_run}){$self->{write_db} = 1};
#if ($args->{create_db}) {$self->create_db};
if ($args->{create_db}) {$self->create_db_tables};
return $self;
}
sub set_record {
my ($self, $field, $value) = @_;
# TODO support mult-field simultanous loading
if ($value) {
$self->{record}->{$field} = $value;
}
return $self;
}
sub debug_record {
my ($self) = @_;
print Dumper $self->{record};
return;
}
sub get_record {
my ($self, $field) = @_;
if (not $field){return $self}
return $self->{record}->{$field};
}
sub data_to_db {
my ($self) = @_;
if ($self->{file} and $self->{data} and $self->{dir}){
croak 'only file, data or dir as arg but not both'
}
elsif (! ($self->{file} or $self->{data} or $self->{dir})) {
croak 'file, data or dir needed as arg'
}
if ($self->{file}) {
$self->_parse_file($self->{file});
}
elsif ($self->{data}) {
$self->_parse_data($self->{data});
}
elsif ($self->{dir}) {
my $dir = $self->{dir};
my $e = q{};
foreach my $f (<$dir*>) {
#if (! (-e $f) || -z $f) # no worky - need path info
$self->_parse_file($f);
$e++;
}
if (! $e){croak 'no files to parse in: ', Dumper $dir;};
}
else {
croak 'no file source in args!', Dumper $self;
}
return;
}
sub _parse_file {
my ($self, $file) = @_;
if (!-f $file) { croak "file not found: $file" }
if ($self->{verbose}){warn "processing file: $file\n"};
if (($self->{file_format} eq 'csv') || ($self->{file_format} eq 'tsv')){
$self->dbload_from_csv($file);
}
elsif (($self->{file_format} eq 'html') || ($self->{file_format} eq 'custom')){
# not supported, too many possibilities
# supply your own
$self->dbload($file);
}
else {
croak 'unsupported file format requested, format must be csv or tsv';
}
return;
}
sub _parse_data {
my ($self, $data) = @_;
if (!$data) {croak "data empty $data"}
if ($self->{verbose}){carp 'processing data:'};
if ($self->{file_format} =~ /(?:csv|tsv)/sm) {
croak 'TODO: not yet supported';
#$self->dbload_from_csv($data);
}
elsif (($self->{file_format} eq 'html') || ($self->{file_format} eq 'custom')){
# not supported, too many possibilities
# supply your own
$self->dbload($data);
}
else {
croak 'unsupported file format requested, '
.'format must be csv, tsv. html, custom also possible';
}
return $self;
}
sub _confirm_header_order {
my ($hr) = @_;
return ($hr->{quote} eq 'Quote'
and $hr->{name} eq 'Attribution Name',
and $hr->{source} eq 'Attribution Source',
and $hr->{catg} eq 'Category',
and $hr->{rating} eq 'Rating')
or croak 'incorrect headers or header order';
}
sub dbload_from_csv {
my ($self, $file) = @_;
my $delim = $self->{delim} || ',';
my $csv = Text::CSV->new({
sep_char => $delim,
binary => 1
});
$csv->column_names (@QUOTE_FIELDS);
open my $source, '<:encoding(utf8)', $file || croak $!;
_confirm_header_order($csv->getline_hr($source));
while (my $hr = $csv->getline_hr($source)) {
next unless $hr->{quote} and $hr->{name};
if ($self->{verbose}){
print "\n",
'Quote: ', $hr->{quote},"\n",
'Name: ', $hr->{name},"\n",
'Source: ', $hr->{source},"\n",
'Category:', $hr->{catg},"\n",
'Rating: ', $hr->{rating},"\n\n";
};
$self->set_record(quote => $hr->{quote});
$self->set_record(name => $hr->{name});
$self->set_record(source => ($self->{attr_source} || $hr->{source}));
# take user defined first
# TODO support multi categories
$self->set_record(catg => ($self->{category} || $hr->{catg}));
$self->set_record(rating => ($self->{rating} || $hr->{rating}));
$self->write_record;
}
close $source or carp $!;
return $self;
}
# sub class this - i.e. provide this method in your code (see test
# 01-load_quotes.t)
sub dbload {
croak 'Override this. Provide this method in a sub class (child) of this object';
# see tests: t/01-load_quotes.t for examples
}
sub _to_utf8 {
my ($self) = @_;
RECORD:
foreach my $r (@QUOTE_FIELDS){
my $val = $self->{record}->{$r};
if (ref $val eq 'ARRAY'){
foreach my $v (@{$val}){
if (!is_utf8($v)){
push @{$self->{record}->{$r}}, decode($FILE_ENCODING, $v);
}
}
}
else {
if (!is_utf8($val)){
$self->{record}->{$r} = decode($FILE_ENCODING, $val);
}
}
}
return $self;
}
# XXX refactor (the following 3 methods)
# one person can have many quotes, is this person in our attribution table
# already?
sub _get_id_if_attr_name_exist {
my ($self) = @_;
my $attr_id = q{};
RECS:
foreach my $c_obj (Attr->retrieve_all){
next RECS if not $c_obj->name;
if ($c_obj->name eq $self->get_record('name')){
# use attribution id if already exists
$attr_id = $c_obj->attr_id;
}
}
return $attr_id;
}
sub _get_id_if_catg_exist {
my ($self, $ctg) = @_;
my $catg_id = q{};
# get category id
RECS:
foreach my $c_obj (Catg->retrieve_all){
next RECS if not $c_obj->catg;
if ($c_obj->catg eq $ctg){
# use cat_id if already exists
$catg_id = $c_obj->catg_id;
}
}
return $catg_id;
}
#TODO : refactor
sub write_record {
my ($self) = @_;
$self->_to_utf8;
if ($self->{verbose} and $self->get_record('name')){
print 'Attribution Name: ',$self->get_record('name'),"\n";
};
my $attr_id = $self->_get_id_if_attr_name_exist;
# nope, ok, add them
if (not $attr_id) { # attribution record does not already exist,
# create new entry
if ($self->{write_db}) {
$attr_id = Attr->insert({
name => $self->get_record('name'),
});
}
}
my $catg_ids = ();
if ($self->{write_db}) {
my ($catg) = $self->get_record('catg');
if (! ref $catg){ # 'single' value
my $catg_id = $self->_get_id_if_catg_exist($catg);
if (!$catg_id) {
# category does not already exist,
# create new entry
$catg_id = Catg->insert({catg => $catg});
}
push @{$catg_ids}, $catg_id;
} # support multi catg
elsif (ref $catg eq 'ARRAY'){
foreach my $c (@{$catg}){
my $catg_id = $self->_get_id_if_catg_exist($c);
if (!$catg_id) { # category does not already exist,
# create new entry
$catg_id = Catg->insert({catg => $c});
}
push @{$catg_ids}, $catg_id;
}
}
}
$self->_display_vals_if_verbose;
if ($self->{write_db}) {
my $qid = Quote->insert({
attr_id => $attr_id,
quote => $self->get_record('quote'),
source => $self->get_record('source'),
rating => $self->get_record('rating')
}) or croak $!;
if ($qid) {
my $id;
foreach my $cid (@{$catg_ids}){
$id = QuoteCatg->insert({
quot_id => $qid,
catg_id => $cid,
}) or croak $!;
}
}
}
# confirmation?
# TODO add a test for failure
if ($self->{write_db} and not $attr_id) {croak 'db write not successful'}
#$self->set_record(undef);
$self->{record} = {};
$self->_reset_orig_args;
if ($self->{write_db}) {
$self->success(1);
}
return $self->success;
}
sub _reset_orig_args {
my ($self) = @_;
$self->{record}->{rating} = $self->{orig_args}->{rating};
$self->{record}->{name} = $self->{orig_args}->{attr_source};
$self->{record}->{source} = $self->{orig_args}->{attr_source};
if (ref $self->{orig_args}->{category} eq 'ARRAY') {
foreach my $c (@{$self->{orig_args}->{category}}){
push @{$self->{record}->{catg}}, $c;
}
}
else {
$self->{record}->{catg} = $self->{orig_args}->{category};
}
}
sub success {
my ($self, $flag) = @_;
$self->{success} ||= $flag;
return $self->{success};
};
sub _display_vals_if_verbose {
my ($self) = @_;
if ($self->{verbose}){
#print 'Quote: ', $self->get_record('quote'),"\n";
#print 'Source: ', $self->get_record('source'),"\n";
#print 'Category: ',$self->get_record('catg'),"\n";
#print 'Rating: ', $self->get_record('rating'),"\n";
print Dumper $self->{record};
}
return $self;
}
#sub create_db {
# my ($self) = @_;
#
# if ($self->{db} and $self->{host}) {
# $self->create_db_mysql();
# }
#}
sub create_db_tables {
my ($self) = @_;
if ($self->{db} and $self->{host}) {
#$self->create_db_mysql();
$self->create_db_tables_mysql();
}
else {
create_db_tables_sqlite();
}
return $self;
}
# XXX we want the user to supply a pre created database.
# created as such 'CREATE DATABASE $dbn CHARACTER SET utf8 COLLATE utf8_general_ci'
# this get's into too many isseuwith privs and database creation
#Sat Aug 22 13:42:37 PDT 2009
# did this:
#mysql> CREATE DATABASE acme_quotedb CHARACTER SET utf8 COLLATE utf8_general_ci;
#mysql> grant usage on *.* to acme_user@localhost identified by 'acme';
#mysql> grant all privileges on acme_quotedb.* to acme_user@localhost ;
#sub create_db_mysql {
# my ($self) = @_;
#
# # hmmmm, what about priv's access, etc
# # maybe user need to supply a db, they have
# # access to, already created (just the db though)
# ## create our db
# #my $dbhc = DBI->connect('DBI:mysql:database=mysql;host='
# # .$self->{host}, $self->{user}, $self->{pass})
# # || croak "db cannot be accessed $! $DBI::errstr";
#
# #my $dbn = $self->{db};
# #my $db = qq(CREATE DATABASE $dbn CHARACTER SET utf8 COLLATE utf8_general_ci);
# # eval {
# # $dbhc->do($db) or croak $dbhc->errstr;
# # };
# # $@ and croak 'Cannot create database!';
# # $dbhc->disconnect; $dbhc = undef;
#
# my $drh = DBI->install_driver('mysql');
# my $rc = $drh->func("dropdb", $self->{db},
# [$self->{host}, $self->{user}, $self->{password}],
# 'admin'
# );
#
# $rc = $drh->func("createdb", $self->{db},
# [$self->{host}, $self->{user}, $self->{password}],
# 'admin'
# );
#}
# XXX refactor with sqlite
sub create_db_tables_mysql {
my ($self) = @_;
# connect to our db
my $c = $self->{db}.';host='.$self->{host};
my $dbh = DBI->connect(
"DBI:mysql:database=$c", $self->{user}, $self->{pass})
|| croak "db cannot be accessed $! $DBI::errstr";
eval {
$dbh->do('DROP TABLE IF EXISTS quote;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote (
quot_id INTEGER NOT NULL AUTO_INCREMENT,
attr_id INTEGER,
quote TEXT,
source TEXT,
rating REAL,
PRIMARY KEY(quot_id)
);')
#)CHARACTER SET utf8 COLLATE utf8_general_ci;
#) ENGINE = MYISAM CHARACTER SET utf8 COLLATE utf8_general_ci;
or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS attribution;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS attribution (
attr_id INTEGER NOT NULL AUTO_INCREMENT,
name TEXT,
PRIMARY KEY(attr_id)
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS category;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS category (
catg_id INTEGER NOT NULL AUTO_INCREMENT,
catg TEXT,
PRIMARY KEY(catg_id)
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS quote_catg;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote_catg (
id INTEGER NOT NULL AUTO_INCREMENT,
catg_id INTEGER,
quot_id INTEGER,
PRIMARY KEY(id)
);') or croak $dbh->errstr;
$dbh->disconnect or warn $dbh->errstr;
$dbh = undef;
};
return $@ and croak 'Cannot create database tables!';
}
sub create_db_tables_sqlite {
my $db = QDBI->get_current_db_path;
#XXX is there really no way to do this with the existing
# connection?!(class dbi)
my $dbh = DBI->connect('dbi:SQLite:dbname='.$db, '', '')
|| croak "$db cannot be accessed $! $DBI::errstr";
#-- sqlite does not have a varchar datatype: VARCHAR(255)
#-- A column declared INTEGER PRIMARY KEY will autoincrement.
eval {
$dbh->do('DROP TABLE IF EXISTS quote;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote (
quot_id INTEGER PRIMARY KEY,
attr_id INTEGER,
quote TEXT,
source TEXT,
rating REAL
);')
or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS attribution;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS attribution (
attr_id INTEGER PRIMARY KEY,
name TEXT
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS category;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS category (
catg_id INTEGER PRIMARY KEY,
catg TEXT
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS quote_catg;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote_catg (
id INTEGER PRIMARY KEY,
catg_id INTEGER,
quot_id INTEGER
);') or croak $dbh->errstr;
$dbh->disconnect or carp $dbh->errstr;
$dbh = undef;
};
return $@ and croak 'Cannot create database tables!';
}
q(My cat's breath smells like cat food. --Ralph Wiggum);
__END__
=head1 NAME
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.567 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )