ACME-QuoteDB

 view release on metacpan or  search on metacpan

t/01-load_quotes.t  view on Meta::CPAN

#!perl -T

use strict;
use warnings;

use Carp qw/croak/;

BEGIN {
    eval "use DBD::SQLite";
    $@ and croak 'DBD::SQLite is a required dependancy';
}

use ACME::QuoteDB;
use ACME::QuoteDB::LoadDB;

#use Test::More 'no_plan';
use Test::More tests => 29;
use File::Basename qw/dirname/;
use Data::Dumper qw/Dumper/;
use File::Spec;

# A. test dry run, show if parsing is succesful but don't load the database
{
  my $q = File::Spec->catfile((dirname(__FILE__),'data'), 
                               'simpsons_quotes.tsv.csv'
          );

  # only 2 supported formats: 'simple' text (which is the default) and 'tsv' 
  my $load_db = ACME::QuoteDB::LoadDB->new({
                              file        => $q,
                              file_format => 'tsv', # the only supported format
                              delimiter   => "\t",
                              # provide a category for all (if not in data)
                              category    => 'Humor',
                              # provide a attr_source for all (if not in data)
                              attr_source => 'The Simpsons',
                              dry_run     => 1, # don't write to the database
                              #verbose    => 1, # show what is being done
                              create_db   => 1, # need to create the database
                          });
  isa_ok $load_db, 'ACME::QuoteDB::LoadDB';

  $load_db->data_to_db;

  #flag not set on dry_run
  is $load_db->success, undef; # success only after a database write, 
  
  my $sq = ACME::QuoteDB->new;
  isa_ok $sq, 'ACME::QuoteDB';
  ok ! $sq->list_attr_names;
}

{
  my $load_db = ACME::QuoteDB::LoadDB->new({
                              file =>
                              #dirname(__FILE__).'/data/simpsons_quotes.tsv.csv',
                              File::Spec->catfile(
                                  (dirname(__FILE__),'data'), 
                                     'simpsons_quotes.tsv.csv'
                              ),
                              file_format => 'tsv',
                              delimiter => "\t",
                              #verbose => 1,
                              create_db   => 1, # first run, create the db
                              # provide a attr_source for all (if not in data)
                              attr_source => 'The Simpsons',
                              # provide a category for all (if not in data)
                              category => 'Humor',
                              # provide a rating for all
                              rating   => 6,
                          });
  
  isa_ok $load_db, 'ACME::QuoteDB::LoadDB';

  $load_db->data_to_db;

  ok $load_db->success;
  is $load_db->success, 1;
   
  my $sq = ACME::QuoteDB->new;
  isa_ok $sq, 'ACME::QuoteDB';
  
  # expected attribution list from our data
  my @expected_attribution_list = (
           'Apu Nahasapemapetilon',
           'Chief Wiggum',
           'Comic Book Guy',
           'Grandpa Simpson',
           'Ralph Wiggum',
          );
  
  is( $sq->list_attr_names, join "\n", sort @expected_attribution_list);
}

{
  #my $sqf = dirname(__FILE__) .  '/data/simpsons_quotes.csv';

  my $sqf = File::Spec->catfile((dirname(__FILE__),'data'), 
                               'simpsons_quotes.csv'
          );
  my $load_db = ACME::QuoteDB::LoadDB->new({
                              file        => $sqf,
                              file_format => 'csv',
                              #delimiter  => ",", # comma is default
                              #verbose    => 1,
                              create_db   => 1, # first run, create the db
                          });
  
  isa_ok $load_db, 'ACME::QuoteDB::LoadDB';
  $load_db->data_to_db;
  ok $load_db->success;
  is $load_db->success, 1;
   
  my $sq = ACME::QuoteDB->new;
  isa_ok $sq, 'ACME::QuoteDB';
  
  # expected attribution list from our data
  my @expected_attribution_list = (
           'Apu Nahasapemapetilon',
           'Chief Wiggum',
           'Comic Book Guy',
           'Grandpa Simpson',
           'Ralph Wiggum',
          );
  
  is( $sq->list_attr_names, join("\n", sort(@expected_attribution_list)));
}

{ # load from html is not supported because there are too many 
  # ways to represt the data.
  # this is an example of extracting quotes from html:
  # subclass ACME::QuoteDB::LoadDB and override dbload,
  # to do our html parsing
  package LoadQuoteDBFromHtml;
  use base 'ACME::QuoteDB::LoadDB';
  use Carp qw/croak/;
  use Data::Dumper qw/Dumper/;
  use HTML::TokeParser;

    sub dbload {
      my ($self, $file) = @_;

      my $p = HTML::TokeParser->new($file) || croak $!;
    
      while (my $token = $p->get_tag("p")) {
          my $idn = $token->[1]{class} || q{};
          my $id = $token->[1]{id} || q{}; # if a quotation is continued (id
          #is not set)
          next unless $idn and ( $idn eq 'quotation' || $idn eq 'source');
          #my $data = $p->get_trimmed_text("/p");
          my $data = $p->get_text('p', 'cite');
          #warn Dumper $data;
          # XXX see $self->set_record in ACME::QuoteDB::LoadDB for fields
          # to populate
          if ($idn eq 'quotation' and $id) {
              $self->set_record(quote => $data);
          }
          elsif ($idn eq 'quotation' and not $id) {
              my $d = $self->get_record('quote') || q{};
              $self->set_record(quote => qq{$d $data});
          }
          elsif ($idn eq 'source'){
              my ($name, $source) = split /,/, $data;
              if ($name) {
                chomp $name;
                $name =~ s/\A\s+//xms;
                $name =~ s/\s+\z//xms;
              }
              $self->set_record(name   => $name);
              $self->set_record(source => $source);

              # TODO
              #$self->set_record({
              #           name   => $name,
              #           source => $source
              #});
          }
    
          if ($self->get_record('quote') and $self->get_record('name')) {
              # we provided a category and rating, otherwise would have to
              # parse from data too
              $self->set_record(catg => $self->{category});
              $self->set_record(rating => $self->{rating});

              # TODO
              #$self->set_record({
              #           catg => $self->{category},
              #           rating => $self->{rating}
              #});

              #$self->debug_record;
              $self->write_record;
          }
      }
    }

  package main;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.517 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )