AI-Categorizer

 view release on metacpan or  search on metacpan

eg/demo.pl  view on Meta::CPAN

}

if (-e $cats) {
  $params{category_file} = $cats;
} else {
  die "$cats not found - can't proceed without category information.\n";
}


# In a real-world application these Collection objects could be of any
# type (any Collection subclass).  Or you could create each Document
# object manually.  Or you could let the KnowledgeSet create the
# Collection objects for you.

$training = AI::Categorizer::Collection::Files->new( path => $training, %params );
$test     = AI::Categorizer::Collection::Files->new( path => $test, %params );

# We turn on verbose mode so you can watch the progress of loading &
# training.  This looks nicer if you have Time::Progress installed!

print "Loading training set\n";

lib/AI/Categorizer.pm  view on Meta::CPAN

package AI::Categorizer;
$VERSION = '0.09';

use strict;
use Class::Container;
use base qw(Class::Container);
use Params::Validate qw(:types);
use File::Spec;
use AI::Categorizer::Learner;
use AI::Categorizer::Document;
use AI::Categorizer::Category;
use AI::Categorizer::Collection;
use AI::Categorizer::Hypothesis;
use AI::Categorizer::KnowledgeSet;


__PACKAGE__->valid_params
  (
   progress_file => { type => SCALAR, default => 'save' },
   knowledge_set => { isa => 'AI::Categorizer::KnowledgeSet' },
   learner       => { isa => 'AI::Categorizer::Learner' },
   verbose       => { type => BOOLEAN, default => 0 },
   training_set  => { type => SCALAR, optional => 1 },
   test_set      => { type => SCALAR, optional => 1 },
   data_root     => { type => SCALAR, optional => 1 },
  );

__PACKAGE__->contained_objects
  (
   knowledge_set => { class => 'AI::Categorizer::KnowledgeSet' },
   learner       => { class => 'AI::Categorizer::Learner::NaiveBayes' },
   experiment    => { class => 'AI::Categorizer::Experiment',
		      delayed => 1 },
   collection    => { class => 'AI::Categorizer::Collection::Files',
		      delayed => 1 },

lib/AI/Categorizer/Category.pm  view on Meta::CPAN

package AI::Categorizer::Category;

use strict;
use AI::Categorizer::ObjectSet;
use Class::Container;
use base qw(Class::Container);

use Params::Validate qw(:types);
use AI::Categorizer::FeatureVector;

__PACKAGE__->valid_params
  (
   name => {type => SCALAR, public => 0},
   documents  => {
		  type => ARRAYREF,
		  default => [],
		  callbacks => { 'all are Document objects' => 
				 sub { ! grep !UNIVERSAL::isa($_, 'AI::Categorizer::Document'), @_ },
			       },
		  public => 0,
		 },
  );

__PACKAGE__->contained_objects
  (

lib/AI/Categorizer/Collection.pm  view on Meta::CPAN

package AI::Categorizer::Collection;
use strict;

use Params::Validate qw(:types);
use Class::Container;
use base qw(Class::Container);
__PACKAGE__->valid_params
  (
   verbose => {type => SCALAR, default => 0},
   stopword_file => { type => SCALAR, optional => 1 },
   category_hash => { type => HASHREF, default => {} },
   category_file => { type => SCALAR, optional => 1 },
  );

__PACKAGE__->contained_objects
  (
   document => { class => 'AI::Categorizer::Document::Text',
		 delayed => 1 },
  );

sub new {
  my ($class, %args) = @_;

lib/AI/Categorizer/Collection.pm  view on Meta::CPAN

be parsed and then fed as the C<stopwords> parameter to the
Document C<new()> method.

=item verbose

If true, some status/debugging information will be printed to
C<STDOUT> during operation.

=item document_class

The class indicating what type of Document object should be created.
This generally specifies the format that the documents are stored in.
The default is C<AI::Categorizer::Document::Text>.

=back

=item next()

Returns the next Document object in the Collection.

=item rewind()

lib/AI/Categorizer/Collection/DBI.pm  view on Meta::CPAN

package AI::Categorizer::Collection::DBI;
use strict;

use DBI;
use AI::Categorizer::Collection;
use base qw(AI::Categorizer::Collection);

use Params::Validate qw(:types);

__PACKAGE__->valid_params
  (
   connection_string => {type => SCALAR, default => undef},
   dbh => {isa => 'DBI::db', default => undef},
   select_statement => {type => SCALAR, default => "SELECT text FROM documents"},
  );

__PACKAGE__->contained_objects
  (
   document => { class => 'AI::Categorizer::Document',
		 delayed => 1 },
  );

sub new {
  my $class = shift;

lib/AI/Categorizer/Collection/Files.pm  view on Meta::CPAN

package AI::Categorizer::Collection::Files;
use strict;

use AI::Categorizer::Collection;
use base qw(AI::Categorizer::Collection);

use Params::Validate qw(:types);
use File::Spec;

__PACKAGE__->valid_params
  (
   path => { type => SCALAR|ARRAYREF },
   recurse => { type => BOOLEAN, default => 0 },
  );

sub new {
  my $class = shift;
  my $self = $class->SUPER::new(@_);
  
  $self->{dir_fh} = do {local *FH; *FH};  # double *FH avoids a warning

  # Documents are contained in a directory, or list of directories
  $self->{path} = [$self->{path}] unless ref $self->{path};

lib/AI/Categorizer/Collection/InMemory.pm  view on Meta::CPAN

package AI::Categorizer::Collection::InMemory;
use strict;

use AI::Categorizer::Collection;
use base qw(AI::Categorizer::Collection);

use Params::Validate qw(:types);

__PACKAGE__->valid_params
  (
   data => { type => HASHREF },
  );

sub new {
  my $self = shift()->SUPER::new(@_);
  
  while (my ($name, $params) = each %{$self->{data}}) {
    foreach (@{$params->{categories}}) {
      next if ref $_;
      $_ = AI::Categorizer::Category->by_name(name => $_);
    }

lib/AI/Categorizer/Collection/SingleFile.pm  view on Meta::CPAN

package AI::Categorizer::Collection::SingleFile;
use strict;

use AI::Categorizer::Collection;
use base qw(AI::Categorizer::Collection);

use Params::Validate qw(:types);

__PACKAGE__->valid_params
  (
   path => { type => SCALAR|ARRAYREF },
   categories => { type => HASHREF|UNDEF, default => undef },
   delimiter => { type => SCALAR },
  );

__PACKAGE__->contained_objects
  (
   document => { class => 'AI::Categorizer::Document::Text',
		 delayed => 1 },
  );

sub new {
  my $class = shift;

lib/AI/Categorizer/Document.pm  view on Meta::CPAN

package AI::Categorizer::Document;

use strict;
use Class::Container;
use base qw(Class::Container);

use Params::Validate qw(:types);
use AI::Categorizer::ObjectSet;
use AI::Categorizer::FeatureVector;

__PACKAGE__->valid_params
  (
   name       => {
		  type => SCALAR, 
		 },
   categories => {
		  type => ARRAYREF,
		  default => [],
		  callbacks => { 'all are Category objects' => 
				 sub { ! grep !UNIVERSAL::isa($_, 'AI::Categorizer::Category'), @{$_[0]} },
			       },
		  public => 0,
		 },
   stopwords => {
		 type => ARRAYREF|HASHREF,
		 default => {},
		},
   content   => {
		 type => HASHREF|SCALAR,
		 default => undef,
		},
   parse => {
	     type => SCALAR,
	     optional => 1,
	    },
   parse_handle => {
		    type => HANDLE,
		    optional => 1,
		   },
   features => {
		isa => 'AI::Categorizer::FeatureVector',
		optional => 1,
	       },
   content_weights => {
		       type => HASHREF,
		       default => {},
		      },
   front_bias => {
		  type => SCALAR,
		  default => 0,
		  },
   use_features => {
		    type => HASHREF|UNDEF,
		    default => undef,
		   },
   stemming => {
		type => SCALAR|UNDEF,
		optional => 1,
	       },
   stopword_behavior => {
			 type => SCALAR,
			 default => "stem",
			},
  );

__PACKAGE__->contained_objects
  (
   features => { delayed => 1,
		 class => 'AI::Categorizer::FeatureVector' },
  );

lib/AI/Categorizer/Document.pm  view on Meta::CPAN


=over 4

=item name

A string that identifies this document.  Required.

=item content

The raw content of this document.  May be specified as either a string
or as a hash reference, allowing structured document types.

=item content_weights

A hash reference indicating the weights that should be assigned to
features in different sections of a structured document when creating
its feature vector.  The weight is a multiplier of the feature vector
values.  For instance, if a C<subject> section has a weight of 3 and a
C<body> section has a weight of 1, and word counts are used as feature
vector values, then it will be as if all words appearing in the
C<subject> appeared 3 times.

lib/AI/Categorizer/Document/Text.pm  view on Meta::CPAN

package AI::Categorizer::Document::Text;

use strict;
use AI::Categorizer::Document;
use base qw(AI::Categorizer::Document);

#use Params::Validate qw(:types);
#use AI::Categorizer::ObjectSet;
#use AI::Categorizer::FeatureVector;

### Constructors

sub parse {
  my ($self, %args) = @_;
  $self->{content} = { body => $args{content} };
}

lib/AI/Categorizer/Experiment.pm  view on Meta::CPAN

package AI::Categorizer::Experiment;

use strict;
use Class::Container;
use AI::Categorizer::Storable;
use Statistics::Contingency;

use base qw(Class::Container AI::Categorizer::Storable Statistics::Contingency);

use Params::Validate qw(:types);
__PACKAGE__->valid_params
  (
   categories => { type => ARRAYREF|HASHREF },
   sig_figs   => { type => SCALAR, default => 4 },
  );

sub new {
  my $package = shift;
  my $self = $package->Class::Container::new(@_);
  
  $self->{$_} = 0 foreach qw(a b c d);
  my $c = delete $self->{categories};
  $self->{categories} = { map {($_ => {a=>0, b=>0, c=>0, d=>0})} 
			  UNIVERSAL::isa($c, 'HASH') ? keys(%$c) : @$c

lib/AI/Categorizer/FeatureSelector.pm  view on Meta::CPAN

package AI::Categorizer::FeatureSelector;

use strict;
use Class::Container;
use base qw(Class::Container);

use Params::Validate qw(:types);
use AI::Categorizer::FeatureVector;
use AI::Categorizer::Util;
use Carp qw(croak);

__PACKAGE__->valid_params
  (
   features_kept => {
		     type => SCALAR,
		     default => 0.2,
		    },
   verbose => {
	       type => SCALAR,
	       default => 0,
	      },
  );

sub verbose {
  my $self = shift;
  $self->{verbose} = shift if @_;
  return $self->{verbose};
}

lib/AI/Categorizer/FeatureSelector.pm  view on Meta::CPAN

when training the Learner or categorizing new documents.  May be
specified as a positive integer (e.g. 2000) indicating the absolute
number of features to be kept, or as a decimal between 0 and 1
(e.g. 0.2) indicating the fraction of the total number of features to
be kept, or as 0 to indicate that no feature selection should be done
and that the entire set of features should be used.  The default is
0.2.

=item feature_selection

A string indicating the type of feature selection that should be
performed.  Currently the only option is also the default option:
C<document_frequency>.

=item tfidf_weighting

Specifies how document word counts should be converted to vector
values.  Uses the three-character specification strings from Salton &
Buckley's paper "Term-weighting approaches in automatic text
retrieval".  The three characters indicate the three factors that will
be multiplied for each feature to find the final vector value for that

lib/AI/Categorizer/FeatureSelector/CategorySelector.pm  view on Meta::CPAN

package AI::Categorizer::FeatureSelector::CategorySelector;

use strict;
use AI::Categorizer::FeatureSelector;
use base qw(AI::Categorizer::FeatureSelector);

use Params::Validate qw(:types);

__PACKAGE__->contained_objects
  (
   features => { class => 'AI::Categorizer::FeatureVector',
		 delayed => 1 },
  );

1;


lib/AI/Categorizer/FeatureSelector/ChiSquare.pm  view on Meta::CPAN

package AI::Categorizer::FeatureSelector::ChiSquare;

use strict;
use AI::Categorizer::FeatureSelector;
use base qw(AI::Categorizer::FeatureSelector::CategorySelector);

use Params::Validate qw(:types);

# Chi-Square function
# NB: this could probably be optimised a bit...

sub reduction_function {
  my ($self,$term,$N,$allFeaturesSum,
      $coll_features,$cat_features,$cat_features_sum) = @_;
  my $CHI2SUM = 0;
  my $nbcats = 0;
  foreach my $catname (keys %{$cat_features}) {

lib/AI/Categorizer/FeatureSelector/DocFrequency.pm  view on Meta::CPAN

package AI::Categorizer::FeatureSelector::DocFrequency;

use strict;
use AI::Categorizer::FeatureSelector;
use base qw(AI::Categorizer::FeatureSelector);

use Params::Validate qw(:types);
use Carp qw(croak);

__PACKAGE__->contained_objects
  (
   features => { class => 'AI::Categorizer::FeatureVector',
		 delayed => 1 },
  );

# The KnowledgeSet keeps track of document frequency, so just use that.
sub rank_features {

lib/AI/Categorizer/Hypothesis.pm  view on Meta::CPAN

package AI::Categorizer::Hypothesis;

use strict;

use Class::Container;
use base qw(Class::Container);
use Params::Validate qw(:types);

__PACKAGE__->valid_params
  (
   all_categories => {type => ARRAYREF},
   scores => {type => HASHREF},
   threshold => {type => SCALAR},
   document_name => {type => SCALAR, optional => 1},
  );

sub all_categories { @{$_[0]->{all_categories}} }
sub document_name  { $_[0]->{document_name} }
sub threshold      { $_[0]->{threshold} }

sub best_category {
  my ($self) = @_;
  my $sc = $self->{scores};
  return unless %$sc;

lib/AI/Categorizer/KnowledgeSet.pm  view on Meta::CPAN

package AI::Categorizer::KnowledgeSet;

use strict;
use Class::Container;
use AI::Categorizer::Storable;
use base qw(Class::Container AI::Categorizer::Storable);

use Params::Validate qw(:types);
use AI::Categorizer::ObjectSet;
use AI::Categorizer::Document;
use AI::Categorizer::Category;
use AI::Categorizer::FeatureVector;
use AI::Categorizer::Util;
use Carp qw(croak);

__PACKAGE__->valid_params
  (
   categories => {
		  type => ARRAYREF,
		  default => [],
		  callbacks => { 'all are Category objects' => 
				 sub { ! grep !UNIVERSAL::isa($_, 'AI::Categorizer::Category'),
					 @{$_[0]} },
			       },
		 },
   documents  => {
		  type => ARRAYREF,
		  default => [],
		  callbacks => { 'all are Document objects' => 
				 sub { ! grep !UNIVERSAL::isa($_, 'AI::Categorizer::Document'),
					 @{$_[0]} },
			       },
		 },
   scan_first => {
		  type => BOOLEAN,
		  default => 1,
		 },
   feature_selector => {
			isa => 'AI::Categorizer::FeatureSelector',
		       },
   tfidf_weighting  => {
			type => SCALAR,
			optional => 1,
		       },
   term_weighting  => {
		       type => SCALAR,
		       default => 'x',
		      },
   collection_weighting => {
			    type => SCALAR,
			    default => 'x',
			   },
   normalize_weighting => {
			   type => SCALAR,
			   default => 'x',
			  },
   verbose => {
	       type => SCALAR,
	       default => 0,
	      },
  );

__PACKAGE__->contained_objects
  (
   document => { delayed => 1,
		 class => 'AI::Categorizer::Document' },
   category => { delayed => 1,
		 class => 'AI::Categorizer::Category' },

lib/AI/Categorizer/KnowledgeSet.pm  view on Meta::CPAN

  my ($self, $args) = @_;
  return $args->{collection} || $self->create_delayed_object('collection', %$args);
}

sub scan_stats {
  # Should determine:
  #  - number of documents
  #  - number of categories
  #  - avg. number of categories per document (whole corpus)
  #  - avg. number of tokens per document (whole corpus)
  #  - avg. number of types per document (whole corpus)
  #  - number of documents, tokens, & types for each category
  #  - "category skew index" (% variance?) by num. documents, tokens, and types

  my ($self, %args) = @_;
  my $collection = $self->_make_collection(\%args);
  my $pb = $self->prog_bar($collection);

  my %stats;


  while (my $doc = $collection->next) {
    $pb->();
    $stats{category_count_with_duplicates} += $doc->categories;

    my ($sum, $length) = ($doc->features->sum, $doc->features->length);
    $stats{document_count}++;
    $stats{token_count} += $sum;
    $stats{type_count}  += $length;
    
    foreach my $cat ($doc->categories) {
#warn $doc->name, ": ", $cat->name, "\n";
      $stats{categories}{$cat->name}{document_count}++;
      $stats{categories}{$cat->name}{token_count} += $sum;
      $stats{categories}{$cat->name}{type_count}  += $length;
    }
  }
  print "\n" if $self->verbose;

  my @cats = keys %{ $stats{categories} };

  $stats{category_count}          = @cats;
  $stats{categories_per_document} = $stats{category_count_with_duplicates} / $stats{document_count};
  $stats{tokens_per_document}     = $stats{token_count} / $stats{document_count};
  $stats{types_per_document}      = $stats{type_count}  / $stats{document_count};

  foreach my $thing ('type', 'token', 'document') {
    $stats{"${thing}s_per_category"} = AI::Categorizer::Util::average
      ( map { $stats{categories}{$_}{"${thing}_count"} } @cats );

    next unless @cats;

    # Compute the skews
    my $ssum;
    foreach my $cat (@cats) {
      $ssum += ($stats{categories}{$cat}{"${thing}_count"} - $stats{"${thing}s_per_category"}) ** 2;
    }

lib/AI/Categorizer/KnowledgeSet.pm  view on Meta::CPAN

when training the Learner or categorizing new documents.  May be
specified as a positive integer (e.g. 2000) indicating the absolute
number of features to be kept, or as a decimal between 0 and 1
(e.g. 0.2) indicating the fraction of the total number of features to
be kept, or as 0 to indicate that no feature selection should be done
and that the entire set of features should be used.  The default is
0.2.

=item feature_selection

A string indicating the type of feature selection that should be
performed.  Currently the only option is also the default option:
C<document_frequency>.

=item tfidf_weighting

Specifies how document word counts should be converted to vector
values.  Uses the three-character specification strings from Salton &
Buckley's paper "Term-weighting approaches in automatic text
retrieval".  The three characters indicate the three factors that will
be multiplied for each feature to find the final vector value for that

lib/AI/Categorizer/Learner.pm  view on Meta::CPAN

package AI::Categorizer::Learner;

use strict;
use Class::Container;
use AI::Categorizer::Storable;
use base qw(Class::Container AI::Categorizer::Storable);

use Params::Validate qw(:types);
use AI::Categorizer::ObjectSet;

__PACKAGE__->valid_params
  (
   knowledge_set  => { isa => 'AI::Categorizer::KnowledgeSet', optional => 1 },
   verbose => {type => SCALAR, default => 0},
  );

__PACKAGE__->contained_objects
  (
   hypothesis => {
		  class => 'AI::Categorizer::Hypothesis',
		  delayed => 1,
		 },
   experiment => {
		  class => 'AI::Categorizer::Experiment',

lib/AI/Categorizer/Learner/Boolean.pm  view on Meta::CPAN

package AI::Categorizer::Learner::Boolean;

use strict;
use AI::Categorizer::Learner;
use base qw(AI::Categorizer::Learner);
use Params::Validate qw(:types);
use AI::Categorizer::Util qw(random_elements);

__PACKAGE__->valid_params
  (
   max_instances => {type => SCALAR, default => 0},
   threshold => {type => SCALAR, default => 0.5},
  );

sub create_model {
  my $self = shift;
  my $m = $self->{model} ||= {};
  my $mi = $self->{max_instances};

  foreach my $cat ($self->knowledge_set->categories) {
    my (@p, @n);
    foreach my $doc ($self->knowledge_set->documents) {

lib/AI/Categorizer/Learner/Boolean.pm  view on Meta::CPAN

into multi-valued categorizers.  For instance, the decision tree
categorizer C<AI::Categorizer::Learner::DecisionTree> maintains a
decision tree for each category, then uses it to decide whether a
certain document belongs to the given category.

Any class that inherits from this class should implement the following
methods:

=head2 create_boolean_model()

Used during training to create a category-specific model.  The type of
model you create is up to you - it should be returned as a scalar.
Whatever you return will be available to you in the
C<get_boolean_score()> method, so put any information you'll need
during categorization in this scalar.

In addition to C<$self>, this method will be passed three arguments.
The first argument is a reference to an array of B<positive> examples,
i.e. documents that belong to the given category.  The next argument
is a reference to an array of B<negative> examples, i.e. documents
that do I<not> belong to the given category.  The final argument is

lib/AI/Categorizer/Learner/KNN.pm  view on Meta::CPAN

package AI::Categorizer::Learner::KNN;

use strict;
use AI::Categorizer::Learner;
use base qw(AI::Categorizer::Learner);
use Params::Validate qw(:types);

__PACKAGE__->valid_params
  (
   threshold => {type => SCALAR, default => 0.4},
   k_value => {type => SCALAR, default => 20},
   knn_weighting => {type => SCALAR, default => 'score'},
   max_instances => {type => SCALAR, default => 0},
  );

sub create_model {
  my $self = shift;
  foreach my $doc ($self->knowledge_set->documents) {
    $doc->features->normalize;
  }
  $self->knowledge_set->features;  # Initialize
}

lib/AI/Categorizer/Learner/NaiveBayes.pm  view on Meta::CPAN

package AI::Categorizer::Learner::NaiveBayes;

use strict;
use AI::Categorizer::Learner;
use base qw(AI::Categorizer::Learner);
use Params::Validate qw(:types);
use Algorithm::NaiveBayes;

__PACKAGE__->valid_params
  (
   threshold => {type => SCALAR, default => 0.3},
  );

sub create_model {
  my $self = shift;
  my $m = $self->{model} = Algorithm::NaiveBayes->new;

  foreach my $d ($self->knowledge_set->documents) {
    $m->add_instance(attributes => $d->features->as_hash,
		     label      => [ map $_->name, $d->categories ]);
  }

lib/AI/Categorizer/Learner/Rocchio.pm  view on Meta::CPAN

package AI::Categorizer::Learner::Rocchio;
$VERSION = '0.01';

use strict;
use Params::Validate qw(:types);
use AI::Categorizer::FeatureVector;
use AI::Categorizer::Learner::Boolean;
use base qw(AI::Categorizer::Learner::Boolean);

__PACKAGE__->valid_params
  (
   positive_setting => {type => SCALAR, default => 16 },
   negative_setting => {type => SCALAR, default => 4  },
   threshold        => {type => SCALAR, default => 0.1},
  );

sub create_model {
  my $self = shift;
  foreach my $doc ($self->knowledge_set->documents) {
    $doc->features->normalize;
  }
  
  $self->{model}{all_features} = $self->knowledge_set->features(undef);
  $self->SUPER::create_model(@_);

lib/AI/Categorizer/Learner/SVM.pm  view on Meta::CPAN

package AI::Categorizer::Learner::SVM;
$VERSION = '0.01';

use strict;
use AI::Categorizer::Learner::Boolean;
use base qw(AI::Categorizer::Learner::Boolean);
use Algorithm::SVM;
use Algorithm::SVM::DataSet;
use Params::Validate qw(:types);
use File::Spec;

__PACKAGE__->valid_params
  (
   svm_kernel => {type => SCALAR, default => 'linear'},
  );

sub create_model {
  my $self = shift;
  my $f = $self->knowledge_set->features->as_hash;
  my $rmap = [ keys %$f ];
  $self->{model}{feature_map} = { map { $rmap->[$_], $_ } 0..$#$rmap };
  $self->{model}{feature_map_reverse} = $rmap;
  $self->SUPER::create_model(@_);
}

lib/AI/Categorizer/Learner/SVM.pm  view on Meta::CPAN

=head2 new()

Creates a new SVM Learner and returns it.  In addition to the
parameters accepted by the C<AI::Categorizer::Learner> class, the
SVM subclass accepts the following parameters:

=over 4

=item svm_kernel

Specifies what type of kernel should be used when building the SVM.
Default is 'linear'.  Possible values are 'linear', 'polynomial',
'radial' and 'sigmoid'.

=back

=head2 train(knowledge_set => $k)

Trains the categorizer.  This prepares it for later use in
categorizing documents.  The C<knowledge_set> parameter must provide
an object of the class C<AI::Categorizer::KnowledgeSet> (or a subclass

lib/AI/Categorizer/Learner/Weka.pm  view on Meta::CPAN

package AI::Categorizer::Learner::Weka;

use strict;
use AI::Categorizer::Learner::Boolean;
use base qw(AI::Categorizer::Learner::Boolean);
use Params::Validate qw(:types);
use File::Spec;
use File::Copy;
use File::Path ();
use File::Temp ();

__PACKAGE__->valid_params
  (
   java_path => {type => SCALAR, default => 'java'},
   java_args => {type => SCALAR|ARRAYREF, optional => 1},
   weka_path => {type => SCALAR, optional => 1},
   weka_classifier => {type => SCALAR, default => 'weka.classifiers.NaiveBayes'},
   weka_args => {type => SCALAR|ARRAYREF, optional => 1},
   tmpdir => {type => SCALAR, default => File::Spec->tmpdir},
  );

__PACKAGE__->contained_objects
  (
   features => {class => 'AI::Categorizer::FeatureVector', delayed => 1},
  );

sub new {
  my $class = shift;
  my $self = $class->SUPER::new(@_);

lib/AI/Categorizer/ObjectSet.pm  view on Meta::CPAN

  return values %{$_[0]};
}

sub size {
  return scalar keys %{$_[0]};
}

sub insert {
  my $self = shift;
  foreach my $element (@_) {
    #warn "types are ", @_;
    $self->{ $element->name } = $element;
  }
}

sub retrieve { $_[0]->{$_[1]} }

sub includes { exists $_[0]->{ $_[1]->name } }
sub includes_name  { exists $_[0]->{ $_[1] } }

1;



( run in 3.135 seconds using v1.01-cache-2.11-cpan-df04353d9ac )