ACME-QuoteDB
view release on metacpan or search on metacpan
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
.'format must be csv, tsv. html, custom also possible';
}
return $self;
}
sub _confirm_header_order {
my ($hr) = @_;
return ($hr->{quote} eq 'Quote'
and $hr->{name} eq 'Attribution Name',
and $hr->{source} eq 'Attribution Source',
and $hr->{catg} eq 'Category',
and $hr->{rating} eq 'Rating')
or croak 'incorrect headers or header order';
}
sub dbload_from_csv {
my ($self, $file) = @_;
my $delim = $self->{delim} || ',';
my $csv = Text::CSV->new({
sep_char => $delim,
binary => 1
});
$csv->column_names (@QUOTE_FIELDS);
open my $source, '<:encoding(utf8)', $file || croak $!;
_confirm_header_order($csv->getline_hr($source));
while (my $hr = $csv->getline_hr($source)) {
next unless $hr->{quote} and $hr->{name};
if ($self->{verbose}){
print "\n",
'Quote: ', $hr->{quote},"\n",
'Name: ', $hr->{name},"\n",
'Source: ', $hr->{source},"\n",
'Category:', $hr->{catg},"\n",
'Rating: ', $hr->{rating},"\n\n";
};
$self->set_record(quote => $hr->{quote});
$self->set_record(name => $hr->{name});
$self->set_record(source => ($self->{attr_source} || $hr->{source}));
# take user defined first
# TODO support multi categories
$self->set_record(catg => ($self->{category} || $hr->{catg}));
$self->set_record(rating => ($self->{rating} || $hr->{rating}));
$self->write_record;
}
close $source or carp $!;
return $self;
}
# sub class this - i.e. provide this method in your code (see test
# 01-load_quotes.t)
sub dbload {
croak 'Override this. Provide this method in a sub class (child) of this object';
# see tests: t/01-load_quotes.t for examples
}
sub _to_utf8 {
my ($self) = @_;
RECORD:
foreach my $r (@QUOTE_FIELDS){
my $val = $self->{record}->{$r};
if (ref $val eq 'ARRAY'){
foreach my $v (@{$val}){
if (!is_utf8($v)){
push @{$self->{record}->{$r}}, decode($FILE_ENCODING, $v);
}
}
}
else {
if (!is_utf8($val)){
$self->{record}->{$r} = decode($FILE_ENCODING, $val);
}
}
}
return $self;
}
# XXX refactor (the following 3 methods)
# one person can have many quotes, is this person in our attribution table
# already?
sub _get_id_if_attr_name_exist {
my ($self) = @_;
my $attr_id = q{};
RECS:
foreach my $c_obj (Attr->retrieve_all){
next RECS if not $c_obj->name;
if ($c_obj->name eq $self->get_record('name')){
# use attribution id if already exists
$attr_id = $c_obj->attr_id;
}
}
return $attr_id;
}
sub _get_id_if_catg_exist {
my ($self, $ctg) = @_;
my $catg_id = q{};
# get category id
RECS:
foreach my $c_obj (Catg->retrieve_all){
next RECS if not $c_obj->catg;
if ($c_obj->catg eq $ctg){
# use cat_id if already exists
$catg_id = $c_obj->catg_id;
}
}
return $catg_id;
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
$self->{success} ||= $flag;
return $self->{success};
};
sub _display_vals_if_verbose {
my ($self) = @_;
if ($self->{verbose}){
#print 'Quote: ', $self->get_record('quote'),"\n";
#print 'Source: ', $self->get_record('source'),"\n";
#print 'Category: ',$self->get_record('catg'),"\n";
#print 'Rating: ', $self->get_record('rating'),"\n";
print Dumper $self->{record};
}
return $self;
}
#sub create_db {
# my ($self) = @_;
#
# if ($self->{db} and $self->{host}) {
# $self->create_db_mysql();
# }
#}
sub create_db_tables {
my ($self) = @_;
if ($self->{db} and $self->{host}) {
#$self->create_db_mysql();
$self->create_db_tables_mysql();
}
else {
create_db_tables_sqlite();
}
return $self;
}
# XXX we want the user to supply a pre created database.
# created as such 'CREATE DATABASE $dbn CHARACTER SET utf8 COLLATE utf8_general_ci'
# this get's into too many isseuwith privs and database creation
#Sat Aug 22 13:42:37 PDT 2009
# did this:
#mysql> CREATE DATABASE acme_quotedb CHARACTER SET utf8 COLLATE utf8_general_ci;
#mysql> grant usage on *.* to acme_user@localhost identified by 'acme';
#mysql> grant all privileges on acme_quotedb.* to acme_user@localhost ;
#sub create_db_mysql {
# my ($self) = @_;
#
# # hmmmm, what about priv's access, etc
# # maybe user need to supply a db, they have
# # access to, already created (just the db though)
# ## create our db
# #my $dbhc = DBI->connect('DBI:mysql:database=mysql;host='
# # .$self->{host}, $self->{user}, $self->{pass})
# # || croak "db cannot be accessed $! $DBI::errstr";
#
# #my $dbn = $self->{db};
# #my $db = qq(CREATE DATABASE $dbn CHARACTER SET utf8 COLLATE utf8_general_ci);
# # eval {
# # $dbhc->do($db) or croak $dbhc->errstr;
# # };
# # $@ and croak 'Cannot create database!';
# # $dbhc->disconnect; $dbhc = undef;
#
# my $drh = DBI->install_driver('mysql');
# my $rc = $drh->func("dropdb", $self->{db},
# [$self->{host}, $self->{user}, $self->{password}],
# 'admin'
# );
#
# $rc = $drh->func("createdb", $self->{db},
# [$self->{host}, $self->{user}, $self->{password}],
# 'admin'
# );
#}
# XXX refactor with sqlite
sub create_db_tables_mysql {
my ($self) = @_;
# connect to our db
my $c = $self->{db}.';host='.$self->{host};
my $dbh = DBI->connect(
"DBI:mysql:database=$c", $self->{user}, $self->{pass})
|| croak "db cannot be accessed $! $DBI::errstr";
eval {
$dbh->do('DROP TABLE IF EXISTS quote;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote (
quot_id INTEGER NOT NULL AUTO_INCREMENT,
attr_id INTEGER,
quote TEXT,
source TEXT,
rating REAL,
PRIMARY KEY(quot_id)
);')
#)CHARACTER SET utf8 COLLATE utf8_general_ci;
#) ENGINE = MYISAM CHARACTER SET utf8 COLLATE utf8_general_ci;
or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS attribution;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS attribution (
attr_id INTEGER NOT NULL AUTO_INCREMENT,
name TEXT,
PRIMARY KEY(attr_id)
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS category;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS category (
catg_id INTEGER NOT NULL AUTO_INCREMENT,
catg TEXT,
PRIMARY KEY(catg_id)
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS quote_catg;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote_catg (
id INTEGER NOT NULL AUTO_INCREMENT,
catg_id INTEGER,
quot_id INTEGER,
PRIMARY KEY(id)
);') or croak $dbh->errstr;
$dbh->disconnect or warn $dbh->errstr;
$dbh = undef;
};
return $@ and croak 'Cannot create database tables!';
}
sub create_db_tables_sqlite {
my $db = QDBI->get_current_db_path;
#XXX is there really no way to do this with the existing
# connection?!(class dbi)
my $dbh = DBI->connect('dbi:SQLite:dbname='.$db, '', '')
|| croak "$db cannot be accessed $! $DBI::errstr";
#-- sqlite does not have a varchar datatype: VARCHAR(255)
#-- A column declared INTEGER PRIMARY KEY will autoincrement.
eval {
$dbh->do('DROP TABLE IF EXISTS quote;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote (
quot_id INTEGER PRIMARY KEY,
attr_id INTEGER,
quote TEXT,
source TEXT,
rating REAL
);')
or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS attribution;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS attribution (
attr_id INTEGER PRIMARY KEY,
name TEXT
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS category;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS category (
catg_id INTEGER PRIMARY KEY,
catg TEXT
);') or croak $dbh->errstr;
$dbh->do('DROP TABLE IF EXISTS quote_catg;') or croak $dbh->errstr;
$dbh->do('CREATE TABLE IF NOT EXISTS quote_catg (
id INTEGER PRIMARY KEY,
catg_id INTEGER,
quot_id INTEGER
);') or croak $dbh->errstr;
$dbh->disconnect or carp $dbh->errstr;
$dbh = undef;
};
return $@ and croak 'Cannot create database tables!';
}
q(My cat's breath smells like cat food. --Ralph Wiggum);
__END__
=head1 NAME
ACME::QuoteDB::LoadDB - Database loader for ACME::QuoteDB
=head1 VERSION
Version 0.1.1
=head1 SYNOPSIS
load a csv file to quotes database
my $load_db = ACME::QuoteDB::LoadDB->new({
file => '/home/me/data/simpsons_quotes.csv',
file_format => 'csv',
});
$load_db->data_to_db;
print $load_db->success; # bool
header columns of the csv file as follows:
"Quote", "Attribution Name", "Attribution Source", "Category", "Rating"
=head1 DESCRIPTION
This module is part of L<ACME::QuoteDB>. This is a Database loader, it
takes (quotes) data and loads into a database
(currently L<sqlite3 or mysql|/'CONFIGURATION AND ENVIRONMENT'>),
which is then accessed by L<ACME::QuoteDB>.
There are several ways to get quote data into the db via this loader:
(There are more aimed towards 'batch' operations, i.e load a bunch of
records quickly)
=over 4
=item 1
* csv file (pre determined format)
pros: quick and easy to load.
cons: getting the quotes data into the correct format need by this module
=item 2
* any source.
One can take quote data from any source, override
L<ACME::QuoteDB::LoadDB/dbload> loader methods to populate a record
and write it to the db.
pros: can get any quote data into the db.
cons: you supply the method. depending on the complexity of the data
source and munging required this will take longer then the other
methods.
=back
=head3 load from csv file
The pre defined csv file format is:
format of file is as follows: (headers)
"Quote", "Attribution Name", "Attribution Source", "Category", "Rating"
for example:
"Quote", "Attribution Name", "Attribution Source", "Category", "Rating"
"I hope this has taught you kids a lesson: kids never learn.","Chief Wiggum","The Simpsons","Humor",9
"Sideshow Bob has no decency. He called me Chief Piggum. (laughs) Oh wait, I get it, he's all right.","Chief Wiggum","The Simpsons","Humor",8
my $load_db = ACME::QuoteDB::LoadDB->new({
file => dirname(__FILE__).'/data/simpsons_quotes.csv',
file_format => 'csv',
});
$load_db->data_to_db;
if (!$load_db->success){print 'failed'}
=head3 load from any source
If those dont catch your interest, ACME::QuoteDB::LoadDB is sub-classable,
so one can extract data anyway they like and populate the db themselves.
(there is a test that illustrates overriding the stub method, 'dbload')
you need to populate a record data structure:
$self->set_record(quote => q{}); # mandatory
$self->set_record(name => q{}); # mandatory
$self->set_record(source => q{}); # optional but useful
$self->set_record(catg => q{}); # optional but useful
$self->set_record(rating => q{}); # optional but useful
# then to write the record you call
$self->write_record;
NOTE: this is a one record at a time operation, so one would perform
this within a loop. there is no bulk write operation currently.
=head1 OVERVIEW
You have a collection of quotes (adages/sayings/quips/epigrams, etc) for
whatever reason, you use these quotes for whatever reason, you want to
access these quotes in a variety of ways,...
This module is part of L<ACME::QuoteDB>.
This is a Database loader, it takes data (quotes) and loads into a database,
which is then accessed by L<ACME::QuoteDB>.
See L<ACME::QuoteDB>.
=head1 USAGE
General usage, csv/tsv file in the expected format loaded to the database
my $load_db = ACME::QuoteDB::LoadDB->new({
file => '/home/me/data/sorta_funny_quotes.tsv',
file_format => 'tsv',
delimiter => "\t",
# provide a attr_source for all (if not in data)
# data is used first, if not defined use below
attr_source => 'Things Randomly Overheard',
# provide a category for all (if not in data)
category => 'Humor',
# provide a rating for all
rating => 5, # scale 1-10
});
$load_db->data_to_db;
if (!$load_db->success){print 'failed'}
Also see t/01-load_quotes.t included with the distribution.
(available from the CPAN if not included on your system)
=head1 SUBROUTINES/METHODS
This is an Object Oriented module. There is no proceedural interface.
=head2 new
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
{
dry_run => 1,
verbose => 1
}
=item verbose - optional
display to STDOUT what is being done
This can be helpful for testing quotes extraction from file parsing
example:
{verbose => 1}
=item create_db - optional (boolean)
L<ACME::QuoteDB::LoadDB> default behaviour is to always assume there is a
database and append new data to that. (It is usually only needed the first
time one load's data)
setting this parameter to a true value will create a new database.
(so while this is an optional param, it is required at least once ;)
B<NOTE: it is not intelligent, if you hand it a populated database,
it will happily overwrite all data>
B<AGAIN: setting this param will destroy the current database, creating a new
empty one>
example:
{create_db => 1}
=back
=head2 data_to_db
takes the data input provided to new, process' it and writes to the database.
should appropriatly blow up if not successful
=head2 dbload_from_csv
takes a csv file (in our defined format) as an argument, parses it and writes
the data to the database. (uses L<Text::CSV> with pure perl parser)
utf-8 safe. (opens file as utf8)
will croak with message if not successful
=head2 dbload
if your file format is set to 'html' or 'custom' you must
define this method to do your parsing in a sub class.
Load from html is not supported because there are too many
ways to represt the data. (same with 'custom')
(see tests for examples - there is a test for loading a 'fortune' file format)
One can subclass ACME::QuoteDB::LoadDB and override dbload,
to do our html parsing
=head2 debug_record
dump record (show what is set on the internal data structure)
e.g. Data::Dumper
=head2 set_record
only needed it one plans to sub-class this module.
otherwise, is transparent in usage.
if you are sub-classing this module, you would have to populate
this record. (L</write_record> knows about/uses this data structure)
possible fields consist of:
$self->set_record(quote => q{});
$self->set_record(rating => q{});
$self->set_record(name => q{});
$self->set_record(source => q{});
$self->set_record(catg => q{});
currently can only set one attribute at a time.
ie. you cant do this:
$self->set_record(
name => $name,
source => $source
);
# or this even
$self->set_record({
name => $name,
source => $source
});
=head2 get_record
only useful it one plans to sub-class this module.
otherwise, is transparent in usage.
if you are sub-classing this module, you would have to populate
this record. [see L</set_record>]
(L</write_record> knows about/uses this data structure)
possible fields consist of:
$self->get_record('quote');
$self->get_record('rating');
$self->get_record('name');
$self->get_record('source');
$self->get_record('catg');
=head2 success
indicates that the database load was successfull
is undef on failure or if trying a L</dry_run>
=head2 write_record
takes the data structure 'record' '$self->get_record'
(which must exist). checks if attribution name ($self->get_record('name')) exists,
if so, uses existing attribution name, otherwsie creates a new one
Load from html is not supported because there are too many
ways to represt the data. (see tests for examples)
One can subclass ACME::QuoteDB::LoadDB and override dbload,
to do our html parsing
=head2 create_db_tables
create an empty quotes database (with correct tables).
(usually only performed the first time you load data)
B<NOTE: will overwrite ALL existing data>
Set 'create_db' parameter (boolean) to a true value upon instantiation
to enable.
The default action is to assume the database (and tables) exist and just
append new L<ACME::QuoteDB::LoadDB> loads to that.
=begin comment
keep pod coverage happy.
# Coverage for ACME::QuoteDB::LoadDB is 71.4%, with 3 naked subroutines:
# Catg
# Quote
# Attr
# QuoteCatg
pod tests incorrectly state, Catg, Quote and Attr are subroutines, well they
are,... (as aliases) but are imported into here, not defined within
TODO: explore the above (is this a bug, if so, who's?, version effected,
create use case, etc)
=head2 Attr
=head2 Catg
=head2 Quote
=head2 QuoteCatg
=head2 QDBI
=end comment
=begin comment
These methods are more or less private.
I may use them in another modules but You don't need to use or
know about them, so I will obfuscate them here
=head2 create_db_tables_sqlite
=head2 create_db_tables_mysql
=end comment
=head1 DIAGNOSTICS
An error such as:
C<DBD::SQLite::db prepare_cached failed: no such table: ,...>
probably means that you do not have a database created in the correct format.
basically, you need to create the database, usually, on a first run
you need to add the flag:
create_db => 1, # first run, create the db
appending to an existing database is the default behaviour
see L</create_db_tables>
=head1 CONFIGURATION AND ENVIRONMENT
if you are running perl > 5.8.5 and have access to
install cpan modules, you should have no problem installing this module
(utf-8 support in Text::CSV not avaible until 5.8 - we don't support 'non
utf-8 mode)
=over 1
=item * By default, the quotes database used by this module installs in the
system path, 'lib', (See L<Module::Build/"INSTALL PATHS">)
as world writable - i.e. 0666 (and probably owned by root)
If you don't like this, you can modify Build.PL to not chmod the file and it
will install as 444/readonly, you can also set a chown in there for whoever
you want to have RW access to the quotes db.
Alternativly, one can specify a location to a quotes database (file) to use.
(Since the local mode is sqlite3, the file doesn't even need to exist, just
needs read/write access to the path)
Set the environmental variable:
$ENV{ACME_QUOTEDB_PATH} (untested on windows)
(this has to be set before trying a database load and also (everytime) before
using this module, obviouly)
Something such as:
BEGIN {
# give alternate path to the DB
# doesn't need to exist, will create
$ENV{ACME_QUOTEDB_PATH} = '/home/me/my_stuff/my_quote_db'
}
* (NOTE: be sure this (BEGIN) exists *before* the 'use ACME::QuoteDB' lines)
The default is to use sqlite3.
In order to connect to a mysql database, several environmental variables
are required.
BEGIN {
# have to set this to use remote database
( run in 0.610 second using v1.01-cache-2.11-cpan-39bf76dae61 )