ACME-QuoteDB

 view release on metacpan or  search on metacpan

lib/ACME/QuoteDB/LoadDB.pm  view on Meta::CPAN

      $self->{record}->{$field} = $value;
  }

  return $self;
}

sub debug_record {
  my ($self) = @_;

  print Dumper $self->{record};

  return;
}

sub get_record {
  my ($self, $field) = @_;

  if (not $field){return $self}

  return $self->{record}->{$field};
}

sub data_to_db {
    my ($self) = @_;

    if ($self->{file} and $self->{data} and $self->{dir}){
        croak 'only file, data or dir as arg but not both'
    }
    elsif (! ($self->{file} or $self->{data} or $self->{dir})) {
        croak 'file, data or dir needed as arg'
    }

    if ($self->{file}) {
        $self->_parse_file($self->{file});
    }
    elsif ($self->{data}) {
        $self->_parse_data($self->{data});
    }
    elsif ($self->{dir}) {
        my $dir = $self->{dir};
        my $e = q{};
        foreach my $f (<$dir*>) {
           #if (! (-e $f) || -z $f) # no worky - need path info
           $self->_parse_file($f);
           $e++;
        }
        if (! $e){croak 'no files to parse in: ', Dumper $dir;};
    }
    else {
      croak 'no file source in args!', Dumper $self;
    }

    return;
}

sub _parse_file {
  my ($self, $file) = @_;

  if (!-f $file) { croak "file not found: $file" }

  if ($self->{verbose}){warn "processing file: $file\n"};

  if (($self->{file_format} eq 'csv') || ($self->{file_format} eq 'tsv')){
      $self->dbload_from_csv($file);
  }
  elsif (($self->{file_format} eq 'html') || ($self->{file_format} eq 'custom')){
      # not supported, too many possibilities
      # supply your own
      $self->dbload($file);
  }
  else {
      croak 'unsupported file format requested, format must be csv or tsv';
  }

  return;
}

sub _parse_data {
  my ($self, $data) = @_;

  if (!$data) {croak "data empty $data"}

  if ($self->{verbose}){carp 'processing data:'};

  if ($self->{file_format} =~ /(?:csv|tsv)/sm) {
      croak 'TODO: not yet supported';
      #$self->dbload_from_csv($data);
  }
  elsif (($self->{file_format} eq 'html') || ($self->{file_format} eq 'custom')){
      # not supported, too many possibilities
      # supply your own
      $self->dbload($data);
  }
  else {
      croak 'unsupported file format requested, '
             .'format must be csv, tsv. html, custom also possible';
  }

  return $self;
}

sub _confirm_header_order {
  my ($hr) = @_;

  return ($hr->{quote}  eq 'Quote'
      and $hr->{name}   eq 'Attribution Name',
      and $hr->{source} eq 'Attribution Source',
      and $hr->{catg}   eq 'Category',
      and $hr->{rating} eq 'Rating')
      or croak 'incorrect headers or header order';
}

sub dbload_from_csv {
  my ($self, $file) = @_;

  my $delim = $self->{delim} || ',';
  my $csv = Text::CSV->new({
     sep_char => $delim,
     binary => 1
  });
  $csv->column_names (@QUOTE_FIELDS);

  open my $source, '<:encoding(utf8)', $file || croak $!;

  _confirm_header_order($csv->getline_hr($source));

  while (my $hr = $csv->getline_hr($source)) {
      next unless $hr->{quote} and $hr->{name};

      if ($self->{verbose}){
          print "\n",
                'Quote:   ', $hr->{quote},"\n",
                'Name:    ', $hr->{name},"\n",
                'Source:  ', $hr->{source},"\n",
                'Category:', $hr->{catg},"\n",
                'Rating:  ', $hr->{rating},"\n\n";
      };

      $self->set_record(quote  => $hr->{quote});
      $self->set_record(name   => $hr->{name});
      $self->set_record(source => ($self->{attr_source} || $hr->{source}));
      # take user defined first
      # TODO support multi categories

lib/ACME/QuoteDB/LoadDB.pm  view on Meta::CPAN

Files being loaded are assumed to be utf8 encoded. if utf8 flag is not detected,
falls back to latin1 (iso-8859-1). If neither of these is correct, set this
option to the encoding your file is in.

=back

=head4 Operation Related Parameters

=over 4

=item  dry_run - optional

do not write to the database. Use with verbose flag to see what would have beed
written.

This can be helpful for testing the outcome of Loading results. 

i.e. like to confirm that the parsing of your data is correct

example:

{
 dry_run => 1,
 verbose => 1
}

=item  verbose  - optional

display to STDOUT what is being done

This can be helpful for testing quotes extraction from file parsing

example:

{verbose => 1}

=item  create_db  - optional (boolean)

L<ACME::QuoteDB::LoadDB> default behaviour is to always assume there is a
database and append new data to that. (It is usually only needed the first 
time one load's data)

setting this parameter to a true value will create a new database.
(so while this is an optional param, it is required at least once ;)

B<NOTE: it is not intelligent, if you hand it a populated database,
it will happily overwrite all data>

B<AGAIN: setting this param will destroy the current database, creating a new
empty one>

example:

{create_db => 1}


=back 

=head2 data_to_db

takes the data input provided to new, process' it and writes to the database.
should appropriatly blow up if not successful

=head2 dbload_from_csv

takes a csv file (in our defined format) as an argument, parses it and writes
the data to the database. (uses L<Text::CSV> with pure perl parser)
utf-8 safe. (opens file as utf8)

will croak with message if not successful


=head2 dbload

if your file format is set to 'html' or 'custom' you must 
define this method to do your parsing in a sub class.

Load from html is not supported because there are too many 
ways to represt the data. (same with 'custom')
(see tests for examples - there is a test for loading a 'fortune' file format)

One can subclass ACME::QuoteDB::LoadDB and override dbload,
to do our html parsing

=head2 debug_record

dump record (show what is set on the internal data structure) 

e.g. Data::Dumper

=head2 set_record

only needed it one plans to sub-class this module.
otherwise, is transparent in usage.

if you are sub-classing this module, you would have to populate 
this record. (L</write_record> knows about/uses this data structure)

possible fields consist of:

$self->set_record(quote  => q{});
$self->set_record(rating => q{});
$self->set_record(name   => q{});
$self->set_record(source => q{});
$self->set_record(catg   => q{});

currently can only set one attribute at a time.

ie. you cant do this:

 $self->set_record(
            name   => $name,
            source => $source
 );

 # or this even
 $self->set_record({
            name   => $name,
            source => $source
 });



( run in 0.905 second using v1.01-cache-2.11-cpan-39bf76dae61 )