AI-Categorizer
view release on metacpan or search on metacpan
lib/AI/Categorizer.pm view on Meta::CPAN
package AI::Categorizer;
$VERSION = '0.09';
use strict;
use Class::Container;
use base qw(Class::Container);
use Params::Validate qw(:types);
use File::Spec;
use AI::Categorizer::Learner;
use AI::Categorizer::Document;
use AI::Categorizer::Category;
use AI::Categorizer::Collection;
use AI::Categorizer::Hypothesis;
use AI::Categorizer::KnowledgeSet;
__PACKAGE__->valid_params
(
progress_file => { type => SCALAR, default => 'save' },
knowledge_set => { isa => 'AI::Categorizer::KnowledgeSet' },
learner => { isa => 'AI::Categorizer::Learner' },
verbose => { type => BOOLEAN, default => 0 },
training_set => { type => SCALAR, optional => 1 },
test_set => { type => SCALAR, optional => 1 },
data_root => { type => SCALAR, optional => 1 },
);
__PACKAGE__->contained_objects
(
knowledge_set => { class => 'AI::Categorizer::KnowledgeSet' },
learner => { class => 'AI::Categorizer::Learner::NaiveBayes' },
experiment => { class => 'AI::Categorizer::Experiment',
delayed => 1 },
collection => { class => 'AI::Categorizer::Collection::Files',
delayed => 1 },
);
sub new {
my $package = shift;
my %args = @_;
my %defaults;
if (exists $args{data_root}) {
$defaults{training_set} = File::Spec->catfile($args{data_root}, 'training');
$defaults{test_set} = File::Spec->catfile($args{data_root}, 'test');
$defaults{category_file} = File::Spec->catfile($args{data_root}, 'cats.txt');
delete $args{data_root};
}
return $package->SUPER::new(%defaults, %args);
}
#sub dump_parameters {
# my $p = shift()->SUPER::dump_parameters;
# delete $p->{stopwords} if $p->{stopword_file};
# return $p;
#}
sub knowledge_set { shift->{knowledge_set} }
sub learner { shift->{learner} }
# Combines several methods in one sub
sub run_experiment {
my $self = shift;
$self->scan_features;
$self->read_training_set;
$self->train;
$self->evaluate_test_set;
print $self->stats_table;
}
sub scan_features {
my $self = shift;
return unless $self->knowledge_set->scan_first;
$self->knowledge_set->scan_features( path => $self->{training_set} );
$self->knowledge_set->save_features( "$self->{progress_file}-01-features" );
}
sub read_training_set {
my $self = shift;
$self->knowledge_set->restore_features( "$self->{progress_file}-01-features" )
if -e "$self->{progress_file}-01-features";
$self->knowledge_set->read( path => $self->{training_set} );
$self->_save_progress( '02', 'knowledge_set' );
return $self->knowledge_set;
}
sub train {
my $self = shift;
$self->_load_progress( '02', 'knowledge_set' );
$self->learner->train( knowledge_set => $self->{knowledge_set} );
$self->_save_progress( '03', 'learner' );
return $self->learner;
}
sub evaluate_test_set {
my $self = shift;
$self->_load_progress( '03', 'learner' );
my $c = $self->create_delayed_object('collection', path => $self->{test_set} );
$self->{experiment} = $self->learner->categorize_collection( collection => $c );
$self->_save_progress( '04', 'experiment' );
return $self->{experiment};
}
sub stats_table {
my $self = shift;
$self->_load_progress( '04', 'experiment' );
return $self->{experiment}->stats_table;
}
sub progress_file {
shift->{progress_file};
}
( run in 2.516 seconds using v1.01-cache-2.11-cpan-d8267643d1d )