Algorithm-NaiveBayes
view release on metacpan or search on metacpan
lib/Algorithm/NaiveBayes.pm view on Meta::CPAN
}
return $package;
}
sub save_state {
my ($self, $path) = @_;
Storable::nstore($self, $path);
}
sub restore_state {
my ($pkg, $path) = @_;
my $self = Storable::retrieve($path)
or die "Can't restore state from $path: $!";
$self->_load_model_class;
return $self;
}
sub add_instance {
my ($self, %params) = @_;
for ('attributes', 'label') {
die "Missing required '$_' parameter" unless exists $params{$_};
}
for ($params{label}) {
$_ = [$_] unless ref;
@{$self->{labels}}{@$_} = ();
}
$self->{instances}++;
$self->do_add_instance($params{attributes}, $params{label}, $self->{training_data});
}
sub labels { keys %{ $_[0]->{labels} } }
sub instances { $_[0]->{instances} }
sub training_data { $_[0]->{training_data} }
sub train {
my $self = shift;
$self->{model} = $self->do_train($self->{training_data});
$self->do_purge if $self->purge;
}
sub do_purge {
my $self = shift;
delete $self->{training_data};
}
sub purge {
my $self = shift;
$self->{purge} = shift if @_;
return $self->{purge};
}
sub predict {
my ($self, %params) = @_;
my $newattrs = $params{attributes} or die "Missing 'attributes' parameter for predict()";
return $self->do_predict($self->{model}, $newattrs);
}
1;
__END__
# Below is stub documentation for your module. You better edit it!
=head1 NAME
Algorithm::NaiveBayes - Bayesian prediction of categories
=head1 SYNOPSIS
use Algorithm::NaiveBayes;
my $nb = Algorithm::NaiveBayes->new;
$nb->add_instance
(attributes => {foo => 1, bar => 1, baz => 3},
label => 'sports');
$nb->add_instance
(attributes => {foo => 2, blurp => 1},
label => ['sports', 'finance']);
... repeat for several more instances, then:
$nb->train;
# Find results for unseen instances
my $result = $nb->predict
(attributes => {bar => 3, blurp => 2});
=head1 DESCRIPTION
This module implements the classic "Naive Bayes" machine learning
algorithm. It is a well-studied probabilistic algorithm often used in
automatic text categorization. Compared to other algorithms (kNN,
SVM, Decision Trees), it's pretty fast and reasonably competitive in
the quality of its results.
A paper by Fabrizio Sebastiani provides a really good introduction to
text categorization:
L<http://faure.iei.pi.cnr.it/~fabrizio/Publications/ACMCS02.pdf>
=head1 METHODS
=over 4
=item new()
Creates a new C<Algorithm::NaiveBayes> object and returns it. The
following parameters are accepted:
=over 4
=item purge
If set to a true value, the C<do_purge()> method will be invoked during
C<train()>. The default is true. Set this to a false value if you'd
like to be able to add additional instances after training and then
call C<train()> again.
=back
=item add_instance( attributes =E<gt> HASH, label =E<gt> STRING|ARRAY )
( run in 2.059 seconds using v1.01-cache-2.11-cpan-d7f47b0818f )