AI-NaiveBayes
view release on metacpan or search on metacpan
lib/AI/NaiveBayes/Learner.pm view on Meta::CPAN
package AI::NaiveBayes::Learner;
$AI::NaiveBayes::Learner::VERSION = '0.04';
use strict;
use warnings;
use 5.010;
use List::Util qw( min sum );
use Moose;
use AI::NaiveBayes;
has attributes => (is => 'ro', isa => 'HashRef', default => sub { {} }, clearer => '_clear_attrs');
has labels => (is => 'ro', isa => 'HashRef', default => sub { {} }, clearer => '_clear_labels');
has examples => (is => 'ro', isa => 'Int', default => 0, clearer => '_clear_examples');
has features_kept => (is => 'ro', predicate => 'limit_features');
has classifier_class => ( is => 'ro', isa => 'Str', default => 'AI::NaiveBayes' );
sub add_example {
my ($self, %params) = @_;
for ('attributes', 'labels') {
die "Missing required '$_' parameter" unless exists $params{$_};
}
$self->{examples}++;
my $attributes = $params{attributes};
my $labels = $params{labels};
add_hash($self->attributes(), $attributes);
my $our_labels = $self->labels;
foreach my $label ( @$labels ) {
$our_labels->{$label}{count}++;
$our_labels->{$label}{attributes} //= {};
add_hash($our_labels->{$label}{attributes}, $attributes);
}
}
sub classifier {
my $self = shift;
my $examples = $self->examples;
my $labels = $self->labels;
my $vocab_size = keys %{ $self->attributes };
my $model;
$model->{attributes} = $self->attributes;
# Calculate the log-probabilities for each category
foreach my $label (keys %$labels) {
$model->{prior_probs}{$label} = log($labels->{$label}{count} / $examples);
# Count the number of tokens in this cat
my $label_tokens = sum( values %{ $labels->{$label}{attributes} } );
# Compute a smoothing term so P(word|cat)==0 can be avoided
$model->{smoother}{$label} = -log($label_tokens + $vocab_size);
# P(attr|label) = $count/$label_tokens (simple)
# P(attr|label) = ($count + 1)/($label_tokens + $vocab_size) (with smoothing)
# log P(attr|label) = log($count + 1) - log($label_tokens + $vocab_size)
my $denominator = log($label_tokens + $vocab_size);
while (my ($attribute, $count) = each %{ $labels->{$label}{attributes} }) {
$model->{probs}{$label}{$attribute} = log($count + 1) - $denominator;
}
if ($self->limit_features) {
my %old = %{$model->{probs}{$label}};
my @features = sort { abs($old{$a}) <=> abs($old{$b}) } keys(%old);
( run in 0.638 second using v1.01-cache-2.11-cpan-5735350b133 )