ACME-QuoteDB
view release on metacpan or search on metacpan
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
$self->{record}->{quote} = q{};
$self->{record}->{rating} = q{};
$self->{record}->{name} = q{};
$self->{record}->{source} = q{};
$self->{record}->{catg} = q{};
$self->{file} = $args->{file};
$self->{dir} = $args->{dir};
$self->{data} = $args->{data};
$self->{file_format} = $args->{file_format};
$FILE_ENCODING = $args->{file_encoding} || $FILE_ENCODING;
$self->{delim} = $args->{delimiter};
$self->{verbose} = $args->{verbose};
$self->{category} = $args->{category};
$self->{rating} = $args->{rating};
$self->{attr_source} = $args->{attr_source};
$self->{orig_args} = $args;
$self->{success} = undef;
# start with if set
$self->{record}->{rating} = $self->{rating};
$self->{record}->{name} = $self->{attr_source};
$self->{record}->{source} = $self->{attr_source};
if (ref $self->{category} eq 'ARRAY') {
$self->{record}->{catg} = ();
foreach my $c (@{$self->{category}}){
push @{$self->{record}->{catg}}, $c;
}
}
else {
$self->{record}->{catg} = $self->{category};
}
# db connection info
if ($ENV{ACME_QUOTEDB_DB}) {
$self->{db} = $ENV{ACME_QUOTEDB_DB};
$self->{host} = $ENV{ACME_QUOTEDB_HOST};
$self->{user} = $ENV{ACME_QUOTEDB_USER};
$self->{pass} = $ENV{ACME_QUOTEDB_PASS};
}
if (!$args->{dry_run}){$self->{write_db} = 1};
#if ($args->{create_db}) {$self->create_db};
if ($args->{create_db}) {$self->create_db_tables};
return $self;
}
sub set_record {
my ($self, $field, $value) = @_;
# TODO support mult-field simultanous loading
if ($value) {
$self->{record}->{$field} = $value;
}
return $self;
}
sub debug_record {
my ($self) = @_;
print Dumper $self->{record};
return;
}
sub get_record {
my ($self, $field) = @_;
if (not $field){return $self}
return $self->{record}->{$field};
}
sub data_to_db {
my ($self) = @_;
if ($self->{file} and $self->{data} and $self->{dir}){
croak 'only file, data or dir as arg but not both'
}
elsif (! ($self->{file} or $self->{data} or $self->{dir})) {
croak 'file, data or dir needed as arg'
}
if ($self->{file}) {
$self->_parse_file($self->{file});
}
elsif ($self->{data}) {
$self->_parse_data($self->{data});
}
elsif ($self->{dir}) {
my $dir = $self->{dir};
my $e = q{};
foreach my $f (<$dir*>) {
#if (! (-e $f) || -z $f) # no worky - need path info
$self->_parse_file($f);
$e++;
}
if (! $e){croak 'no files to parse in: ', Dumper $dir;};
}
else {
croak 'no file source in args!', Dumper $self;
}
return;
}
sub _parse_file {
my ($self, $file) = @_;
if (!-f $file) { croak "file not found: $file" }
if ($self->{verbose}){warn "processing file: $file\n"};
if (($self->{file_format} eq 'csv') || ($self->{file_format} eq 'tsv')){
$self->dbload_from_csv($file);
}
elsif (($self->{file_format} eq 'html') || ($self->{file_format} eq 'custom')){
# not supported, too many possibilities
lib/ACME/QuoteDB/LoadDB.pm view on Meta::CPAN
}
=item verbose - optional
display to STDOUT what is being done
This can be helpful for testing quotes extraction from file parsing
example:
{verbose => 1}
=item create_db - optional (boolean)
L<ACME::QuoteDB::LoadDB> default behaviour is to always assume there is a
database and append new data to that. (It is usually only needed the first
time one load's data)
setting this parameter to a true value will create a new database.
(so while this is an optional param, it is required at least once ;)
B<NOTE: it is not intelligent, if you hand it a populated database,
it will happily overwrite all data>
B<AGAIN: setting this param will destroy the current database, creating a new
empty one>
example:
{create_db => 1}
=back
=head2 data_to_db
takes the data input provided to new, process' it and writes to the database.
should appropriatly blow up if not successful
=head2 dbload_from_csv
takes a csv file (in our defined format) as an argument, parses it and writes
the data to the database. (uses L<Text::CSV> with pure perl parser)
utf-8 safe. (opens file as utf8)
will croak with message if not successful
=head2 dbload
if your file format is set to 'html' or 'custom' you must
define this method to do your parsing in a sub class.
Load from html is not supported because there are too many
ways to represt the data. (same with 'custom')
(see tests for examples - there is a test for loading a 'fortune' file format)
One can subclass ACME::QuoteDB::LoadDB and override dbload,
to do our html parsing
=head2 debug_record
dump record (show what is set on the internal data structure)
e.g. Data::Dumper
=head2 set_record
only needed it one plans to sub-class this module.
otherwise, is transparent in usage.
if you are sub-classing this module, you would have to populate
this record. (L</write_record> knows about/uses this data structure)
possible fields consist of:
$self->set_record(quote => q{});
$self->set_record(rating => q{});
$self->set_record(name => q{});
$self->set_record(source => q{});
$self->set_record(catg => q{});
currently can only set one attribute at a time.
ie. you cant do this:
$self->set_record(
name => $name,
source => $source
);
# or this even
$self->set_record({
name => $name,
source => $source
});
=head2 get_record
only useful it one plans to sub-class this module.
otherwise, is transparent in usage.
if you are sub-classing this module, you would have to populate
this record. [see L</set_record>]
(L</write_record> knows about/uses this data structure)
possible fields consist of:
$self->get_record('quote');
$self->get_record('rating');
$self->get_record('name');
$self->get_record('source');
$self->get_record('catg');
=head2 success
indicates that the database load was successfull
is undef on failure or if trying a L</dry_run>
( run in 1.862 second using v1.01-cache-2.11-cpan-39bf76dae61 )