AI-Categorizer

 view release on metacpan or  search on metacpan

lib/AI/Categorizer.pm  view on Meta::CPAN

package AI::Categorizer;
$VERSION = '0.09';

use strict;
use Class::Container;
use base qw(Class::Container);
use Params::Validate qw(:types);
use File::Spec;
use AI::Categorizer::Learner;
use AI::Categorizer::Document;
use AI::Categorizer::Category;
use AI::Categorizer::Collection;
use AI::Categorizer::Hypothesis;
use AI::Categorizer::KnowledgeSet;


__PACKAGE__->valid_params
  (
   progress_file => { type => SCALAR, default => 'save' },
   knowledge_set => { isa => 'AI::Categorizer::KnowledgeSet' },
   learner       => { isa => 'AI::Categorizer::Learner' },
   verbose       => { type => BOOLEAN, default => 0 },
   training_set  => { type => SCALAR, optional => 1 },
   test_set      => { type => SCALAR, optional => 1 },
   data_root     => { type => SCALAR, optional => 1 },
  );

__PACKAGE__->contained_objects
  (
   knowledge_set => { class => 'AI::Categorizer::KnowledgeSet' },
   learner       => { class => 'AI::Categorizer::Learner::NaiveBayes' },
   experiment    => { class => 'AI::Categorizer::Experiment',
		      delayed => 1 },
   collection    => { class => 'AI::Categorizer::Collection::Files',
		      delayed => 1 },
  );

sub new {
  my $package = shift;
  my %args = @_;
  my %defaults;
  if (exists $args{data_root}) {
    $defaults{training_set} = File::Spec->catfile($args{data_root}, 'training');
    $defaults{test_set} = File::Spec->catfile($args{data_root}, 'test');
    $defaults{category_file} = File::Spec->catfile($args{data_root}, 'cats.txt');
    delete $args{data_root};
  }

  return $package->SUPER::new(%defaults, %args);
}

#sub dump_parameters {
#  my $p = shift()->SUPER::dump_parameters;
#  delete $p->{stopwords} if $p->{stopword_file};
#  return $p;
#}

sub knowledge_set { shift->{knowledge_set} }
sub learner       { shift->{learner} }

# Combines several methods in one sub
sub run_experiment {
  my $self = shift;
  $self->scan_features;
  $self->read_training_set;
  $self->train;
  $self->evaluate_test_set;
  print $self->stats_table;
}

sub scan_features {
  my $self = shift;
  return unless $self->knowledge_set->scan_first;
  $self->knowledge_set->scan_features( path => $self->{training_set} );
  $self->knowledge_set->save_features( "$self->{progress_file}-01-features" );
}

sub read_training_set {
  my $self = shift;
  $self->knowledge_set->restore_features( "$self->{progress_file}-01-features" )
    if -e "$self->{progress_file}-01-features";
  $self->knowledge_set->read( path => $self->{training_set} );
  $self->_save_progress( '02', 'knowledge_set' );
  return $self->knowledge_set;
}

sub train {
  my $self = shift;
  $self->_load_progress( '02', 'knowledge_set' );
  $self->learner->train( knowledge_set => $self->{knowledge_set} );
  $self->_save_progress( '03', 'learner' );
  return $self->learner;
}

sub evaluate_test_set {
  my $self = shift;
  $self->_load_progress( '03', 'learner' );
  my $c = $self->create_delayed_object('collection', path => $self->{test_set} );
  $self->{experiment} = $self->learner->categorize_collection( collection => $c );
  $self->_save_progress( '04', 'experiment' );
  return $self->{experiment};
}

sub stats_table {
  my $self = shift;
  $self->_load_progress( '04', 'experiment' );
  return $self->{experiment}->stats_table;
}

sub progress_file {
  shift->{progress_file};
}



( run in 2.516 seconds using v1.01-cache-2.11-cpan-d8267643d1d )