ACME-QuoteDB

 view release on metacpan or  search on metacpan

lib/ACME/QuoteDB/LoadDB.pm  view on Meta::CPAN

#$Id: LoadDB.pm,v 1.30 2009/09/30 07:37:09 dinosau2 Exp $
# /* vim:et: set ts=4 sw=4 sts=4 tw=78: */

package ACME::QuoteDB::LoadDB;

use 5.008005;        # require perl 5.8.5, re: DBD::SQLite Unicode
use warnings;
use strict;

#use criticism 'brutal'; # use critic with a ~/.perlcriticrc

use version; our $VERSION = qv('0.1.1');

# with Text::CSV only use 'perl csv loader'
# 'one time' db load performance not a concern
BEGIN {local $ENV{PERL_TEXT_CSV} = 0}

use aliased 'ACME::QuoteDB::DB::Attribution' => 'Attr';
use aliased 'ACME::QuoteDB::DB::QuoteCatg'  => 'QuoteCatg';
use aliased 'ACME::QuoteDB::DB::Category'  => 'Catg';
use aliased 'ACME::QuoteDB::DB::Quote'    => 'Quote';
use aliased 'ACME::QuoteDB::DB::DBI'     => 'QDBI';
use File::Basename qw/dirname basename/;
use File::Glob qw(:globally :nocase);
use Encode qw/is_utf8 decode/;
use Data::Dumper qw/Dumper/;
use Carp qw/carp croak/;
use Text::CSV;
use Readonly;
use DBI;

# if not in utf8 latin1 is assumed
my $FILE_ENCODING = 'iso-8859-1';

Readonly my @QUOTE_FIELDS => qw/quote name source catg rating/;

# XXX refactor
sub new {
    my ($class, $args) = @_;

    # TODO encapsulation
    my $self = bless {}, $class;

    # store each record we extract - keys map to database fields
    # TODO proper encapsulation
    $self->{record} = {};
    $self->{record}->{quote}  = q{};
    $self->{record}->{rating} = q{};
    $self->{record}->{name}   = q{};
    $self->{record}->{source} = q{};
    $self->{record}->{catg}   = q{};

    $self->{file}        = $args->{file};
    $self->{dir}         = $args->{dir};
    $self->{data}        = $args->{data};
    $self->{file_format} = $args->{file_format};
    $FILE_ENCODING       = $args->{file_encoding} || $FILE_ENCODING;
    $self->{delim}       = $args->{delimiter};
    $self->{verbose}     = $args->{verbose};
    $self->{category}    = $args->{category};
    $self->{rating}      = $args->{rating};
    $self->{attr_source} = $args->{attr_source};
    $self->{orig_args}   = $args;
    $self->{success}     = undef;

    # start with if set
    $self->{record}->{rating} = $self->{rating};
    $self->{record}->{name}   = $self->{attr_source};
    $self->{record}->{source} = $self->{attr_source};
    if (ref $self->{category} eq 'ARRAY') {
       $self->{record}->{catg} = ();
       foreach my $c (@{$self->{category}}){
         push @{$self->{record}->{catg}}, $c;
       }
    }
    else {
       $self->{record}->{catg} = $self->{category};
    }

    # db connection info
    if ($ENV{ACME_QUOTEDB_DB}) {
        $self->{db}   = $ENV{ACME_QUOTEDB_DB};
        $self->{host} = $ENV{ACME_QUOTEDB_HOST};
        $self->{user} = $ENV{ACME_QUOTEDB_USER};
        $self->{pass} = $ENV{ACME_QUOTEDB_PASS};
    }

    if (!$args->{dry_run}){$self->{write_db} = 1};
    #if ($args->{create_db}) {$self->create_db};
    if ($args->{create_db}) {$self->create_db_tables};

    return $self;
}

sub set_record {
  my ($self, $field, $value) = @_;

  # TODO support mult-field simultanous loading

  if ($value) {
      $self->{record}->{$field} = $value;
  }

  return $self;
}

sub debug_record {
  my ($self) = @_;

  print Dumper $self->{record};

  return;
}

sub get_record {
  my ($self, $field) = @_;

  if (not $field){return $self}

  return $self->{record}->{$field};
}

sub data_to_db {
    my ($self) = @_;

    if ($self->{file} and $self->{data} and $self->{dir}){
        croak 'only file, data or dir as arg but not both'
    }
    elsif (! ($self->{file} or $self->{data} or $self->{dir})) {
        croak 'file, data or dir needed as arg'
    }

    if ($self->{file}) {
        $self->_parse_file($self->{file});
    }
    elsif ($self->{data}) {
        $self->_parse_data($self->{data});
    }
    elsif ($self->{dir}) {
        my $dir = $self->{dir};
        my $e = q{};
        foreach my $f (<$dir*>) {
           #if (! (-e $f) || -z $f) # no worky - need path info
           $self->_parse_file($f);
           $e++;
        }
        if (! $e){croak 'no files to parse in: ', Dumper $dir;};
    }
    else {
      croak 'no file source in args!', Dumper $self;
    }

    return;
}

sub _parse_file {
  my ($self, $file) = @_;

  if (!-f $file) { croak "file not found: $file" }

  if ($self->{verbose}){warn "processing file: $file\n"};

  if (($self->{file_format} eq 'csv') || ($self->{file_format} eq 'tsv')){
      $self->dbload_from_csv($file);
  }
  elsif (($self->{file_format} eq 'html') || ($self->{file_format} eq 'custom')){
      # not supported, too many possibilities
      # supply your own
      $self->dbload($file);
  }
  else {
      croak 'unsupported file format requested, format must be csv or tsv';
  }

  return;
}

sub _parse_data {
  my ($self, $data) = @_;

  if (!$data) {croak "data empty $data"}

  if ($self->{verbose}){carp 'processing data:'};

  if ($self->{file_format} =~ /(?:csv|tsv)/sm) {
      croak 'TODO: not yet supported';
      #$self->dbload_from_csv($data);
  }
  elsif (($self->{file_format} eq 'html') || ($self->{file_format} eq 'custom')){
      # not supported, too many possibilities
      # supply your own
      $self->dbload($data);
  }
  else {
      croak 'unsupported file format requested, '
             .'format must be csv, tsv. html, custom also possible';
  }

  return $self;
}

sub _confirm_header_order {
  my ($hr) = @_;

  return ($hr->{quote}  eq 'Quote'
      and $hr->{name}   eq 'Attribution Name',
      and $hr->{source} eq 'Attribution Source',
      and $hr->{catg}   eq 'Category',
      and $hr->{rating} eq 'Rating')
      or croak 'incorrect headers or header order';
}

sub dbload_from_csv {
  my ($self, $file) = @_;

  my $delim = $self->{delim} || ',';
  my $csv = Text::CSV->new({
     sep_char => $delim,
     binary => 1
  });
  $csv->column_names (@QUOTE_FIELDS);

  open my $source, '<:encoding(utf8)', $file || croak $!;

  _confirm_header_order($csv->getline_hr($source));

  while (my $hr = $csv->getline_hr($source)) {
      next unless $hr->{quote} and $hr->{name};

      if ($self->{verbose}){
          print "\n",
                'Quote:   ', $hr->{quote},"\n",
                'Name:    ', $hr->{name},"\n",
                'Source:  ', $hr->{source},"\n",
                'Category:', $hr->{catg},"\n",
                'Rating:  ', $hr->{rating},"\n\n";
      };

      $self->set_record(quote  => $hr->{quote});
      $self->set_record(name   => $hr->{name});
      $self->set_record(source => ($self->{attr_source} || $hr->{source}));
      # take user defined first
      # TODO support multi categories
      $self->set_record(catg   => ($self->{category} || $hr->{catg}));
      $self->set_record(rating => ($self->{rating} || $hr->{rating}));
      $self->write_record;
  }
  close $source or carp $!;

  return $self;
}

# sub class this - i.e. provide this method in your code (see test
# 01-load_quotes.t)
sub dbload {
  croak 'Override this. Provide this method in a sub class (child) of this object';
  # see tests: t/01-load_quotes.t for examples
}

sub _to_utf8 {
    my ($self) = @_;

    RECORD:
    foreach my $r (@QUOTE_FIELDS){
        my $val = $self->{record}->{$r};
        if (ref $val eq 'ARRAY'){
         foreach my $v (@{$val}){
           if (!is_utf8($v)){
             push @{$self->{record}->{$r}}, decode($FILE_ENCODING, $v);
           }
         }
        }
        else {
          if (!is_utf8($val)){
            $self->{record}->{$r} = decode($FILE_ENCODING, $val);
          }
        }
    }

    return $self;
}

# XXX refactor (the following 3 methods)

# one person can have many quotes, is this person in our attribution table
# already?
sub _get_id_if_attr_name_exist {
    my ($self) = @_;

    my $attr_id = q{};

    RECS:
    foreach my $c_obj (Attr->retrieve_all){
        next RECS if not $c_obj->name;
        if ($c_obj->name eq $self->get_record('name')){
          # use attribution id if already exists
          $attr_id = $c_obj->attr_id;
        }
    }
    return $attr_id;
}

sub _get_id_if_catg_exist {
    my ($self, $ctg) = @_;

    my $catg_id = q{};
    # get category id
    RECS:
    foreach my $c_obj (Catg->retrieve_all){
        next RECS if not $c_obj->catg;
        if ($c_obj->catg eq $ctg){
          # use cat_id if already exists
          $catg_id = $c_obj->catg_id;
        }
    }
    return $catg_id;
}

#TODO : refactor
sub write_record {
    my ($self) = @_;

    $self->_to_utf8;

    if ($self->{verbose} and $self->get_record('name')){
        print 'Attribution Name: ',$self->get_record('name'),"\n";
    };

    my $attr_id = $self->_get_id_if_attr_name_exist;
    # nope, ok, add them
    if (not $attr_id) { # attribution record does not already exist, 
                        # create new entry
        if ($self->{write_db}) {
            $attr_id = Attr->insert({
                          name   => $self->get_record('name'),
                       });
        }
    }

    my $catg_ids = ();
    if ($self->{write_db}) {
      my ($catg) = $self->get_record('catg');
      if (! ref $catg){ # 'single' value
        my $catg_id = $self->_get_id_if_catg_exist($catg);
        if (!$catg_id) {
          # category does not already exist, 
          # create new entry
          $catg_id = Catg->insert({catg => $catg});
        }
        push @{$catg_ids}, $catg_id;
      } # support multi catg
      elsif (ref $catg eq 'ARRAY'){
          foreach my $c (@{$catg}){
            my $catg_id = $self->_get_id_if_catg_exist($c);
            if (!$catg_id) { # category does not already exist, 
               # create new entry
               $catg_id = Catg->insert({catg => $c});
            }
            push @{$catg_ids}, $catg_id;
          }
      }
    }

    $self->_display_vals_if_verbose;

    if ($self->{write_db}) {
       my $qid = Quote->insert({
               attr_id  => $attr_id,
               quote    => $self->get_record('quote'),
               source   => $self->get_record('source'),
               rating   => $self->get_record('rating')
       }) or croak $!;

       if ($qid) {
         my $id;
         foreach my $cid (@{$catg_ids}){
           $id =   QuoteCatg->insert({
                 quot_id  => $qid,
                 catg_id  => $cid,
            }) or croak $!;
         }
       }
    }
    # confirmation?
    # TODO add a test for failure
    if ($self->{write_db} and not $attr_id) {croak 'db write not successful'}

    #$self->set_record(undef);
    $self->{record} = {};
    $self->_reset_orig_args;

    if ($self->{write_db}) {
        $self->success(1);
    }

    return $self->success;
}

sub _reset_orig_args {
  my ($self) = @_;

  $self->{record}->{rating} = $self->{orig_args}->{rating};
  $self->{record}->{name}   = $self->{orig_args}->{attr_source};
  $self->{record}->{source} = $self->{orig_args}->{attr_source};
  if (ref $self->{orig_args}->{category} eq 'ARRAY') {
     foreach my $c (@{$self->{orig_args}->{category}}){
       push @{$self->{record}->{catg}}, $c;
     }
  } 
  else {
    $self->{record}->{catg} = $self->{orig_args}->{category};
  }

}

sub success {
  my ($self, $flag) = @_;

  $self->{success} ||= $flag;

  return $self->{success};
};

sub _display_vals_if_verbose {
    my ($self) = @_;

    if ($self->{verbose}){
        #print 'Quote: ',   $self->get_record('quote'),"\n";
        #print 'Source: ',  $self->get_record('source'),"\n";
        #print 'Category: ',$self->get_record('catg'),"\n";
        #print 'Rating: ',  $self->get_record('rating'),"\n";
        print Dumper $self->{record};
    }

    return $self;
}

#sub create_db {
#    my ($self) = @_;
#
#    if ($self->{db} and $self->{host}) {
#      $self->create_db_mysql();
#    }
#}

sub create_db_tables {
    my ($self) = @_;

    if ($self->{db} and $self->{host}) {
      #$self->create_db_mysql();
      $self->create_db_tables_mysql();
    }
    else {
      create_db_tables_sqlite();
    }

    return $self;

}

# XXX  we want the user to supply a pre created database.
# created as such 'CREATE DATABASE $dbn CHARACTER SET utf8 COLLATE utf8_general_ci'
# this get's into too many isseuwith privs and database creation
#Sat Aug 22 13:42:37 PDT 2009
# did this:
#mysql> CREATE DATABASE acme_quotedb CHARACTER SET utf8 COLLATE utf8_general_ci;
#mysql> grant usage on *.* to acme_user@localhost identified by 'acme';
#mysql> grant all privileges on acme_quotedb.* to acme_user@localhost ;

#sub create_db_mysql {
#    my ($self) = @_;
#
#     # hmmmm, what about priv's access, etc
#     # maybe user need to supply a db, they have 
#     # access to, already created (just the db though)
#     ## create our db
#     #my $dbhc = DBI->connect('DBI:mysql:database=mysql;host='
#     #                           .$self->{host}, $self->{user}, $self->{pass})
#     #      || croak "db cannot be accessed $! $DBI::errstr";
#
#     #my $dbn = $self->{db};
#     #my $db = qq(CREATE DATABASE $dbn CHARACTER SET utf8 COLLATE utf8_general_ci);
#     # eval {
#     #     $dbhc->do($db) or croak $dbhc->errstr;
#     # };
#     # $@ and croak 'Cannot create database!';
#     # $dbhc->disconnect; $dbhc = undef;
#
#     my $drh = DBI->install_driver('mysql');
#     my $rc = $drh->func("dropdb", $self->{db}, 
#                    [$self->{host}, $self->{user}, $self->{password}],
#                    'admin'
#                );
#
#        $rc = $drh->func("createdb", $self->{db}, 
#                    [$self->{host}, $self->{user}, $self->{password}],
#                    'admin'
#                );
#}

# XXX refactor with sqlite
sub create_db_tables_mysql {
    my ($self) = @_;

     # connect to our db
     my $c = $self->{db}.';host='.$self->{host};
     my $dbh = DBI->connect(
             "DBI:mysql:database=$c", $self->{user}, $self->{pass})
               || croak "db cannot be accessed $! $DBI::errstr";

    eval {
        $dbh->do('DROP TABLE IF EXISTS quote;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS quote (
            quot_id        INTEGER NOT NULL AUTO_INCREMENT, 
            attr_id        INTEGER,
            quote          TEXT,
            source         TEXT,
            rating         REAL,
            PRIMARY KEY(quot_id)
            );')
            #)CHARACTER SET utf8 COLLATE utf8_general_ci;
            #) ENGINE = MYISAM CHARACTER SET utf8 COLLATE utf8_general_ci; 
            or croak $dbh->errstr;

        $dbh->do('DROP TABLE IF EXISTS attribution;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS attribution (
            attr_id  INTEGER NOT NULL AUTO_INCREMENT,
            name     TEXT,
            PRIMARY KEY(attr_id)
            );') or croak $dbh->errstr;

        $dbh->do('DROP TABLE IF EXISTS category;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS category (
            catg_id    INTEGER NOT NULL AUTO_INCREMENT, 
            catg       TEXT,
            PRIMARY KEY(catg_id)
            );') or croak $dbh->errstr;

        $dbh->do('DROP TABLE IF EXISTS quote_catg;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS quote_catg (
            id    INTEGER NOT NULL AUTO_INCREMENT, 
            catg_id    INTEGER, 
            quot_id    INTEGER, 
            PRIMARY KEY(id)
            );') or croak $dbh->errstr;


       $dbh->disconnect or warn $dbh->errstr;

       $dbh = undef;
    };

    return $@ and croak 'Cannot create database tables!';

}

sub create_db_tables_sqlite {

     my $db = QDBI->get_current_db_path;

     #XXX is there really no way to do this with the existing 
     # connection?!(class dbi)
     my $dbh = DBI->connect('dbi:SQLite:dbname='.$db, '', '')
       || croak "$db cannot be accessed $! $DBI::errstr";

    #-- sqlite does not have a varchar datatype: VARCHAR(255)
    #-- A column declared INTEGER PRIMARY KEY will autoincrement.
    eval {
        $dbh->do('DROP TABLE IF EXISTS quote;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS quote (
            quot_id        INTEGER PRIMARY KEY, 
            attr_id        INTEGER,
            quote          TEXT,
            source         TEXT,
            rating         REAL
            );')
            or croak $dbh->errstr;

        $dbh->do('DROP TABLE IF EXISTS attribution;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS attribution (
            attr_id  INTEGER PRIMARY KEY,
            name     TEXT
            );') or croak $dbh->errstr;

        $dbh->do('DROP TABLE IF EXISTS category;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS category (
            catg_id    INTEGER PRIMARY KEY, 
            catg       TEXT
            );') or croak $dbh->errstr;

        $dbh->do('DROP TABLE IF EXISTS quote_catg;') or croak $dbh->errstr;

        $dbh->do('CREATE TABLE IF NOT EXISTS quote_catg (
            id         INTEGER PRIMARY KEY,
            catg_id    INTEGER,
            quot_id    INTEGER
            );') or croak $dbh->errstr;

        $dbh->disconnect or carp $dbh->errstr;

        $dbh = undef;
    };

    return $@ and croak 'Cannot create database tables!';

}

q(My cat's breath smells like cat food. --Ralph Wiggum);


__END__

=head1 NAME

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.567 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )