AI-NaiveBayes
view release on metacpan or search on metacpan
return $learner->classifier;
}
sub classify {
my ($self, $newattrs) = @_;
$newattrs or die "Missing parameter for classify()";
my $m = $self->model;
# Note that we're using the log(prob) here. That's why we add instead of multiply.
my %scores = %{$m->{prior_probs}};
my %features;
while (my ($feature, $value) = each %$newattrs) {
next unless exists $m->{attributes}{$feature}; # Ignore totally unseen features
while (my ($label, $attributes) = each %{$m->{probs}}) {
my $score = ($attributes->{$feature} || $m->{smoother}{$label})*$value; # P($feature|$label)**$value
$scores{$label} += $score;
$features{$feature}{$label} = $score;
}
}
rescale(\%scores);
return AI::NaiveBayes::Classification->new( label_sums => \%scores, features => \%features );
}
sub rescale {
my ($scores) = @_;
# Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
my $total = 0;
my $max = max(values %$scores);
foreach (values %$scores) {
$_ = exp($_ - $max);
$total += $_**2;
}
$total = sqrt($total);
foreach (values %$scores) {
$_ /= $total;
}
lib/AI/NaiveBayes.pm view on Meta::CPAN
return $learner->classifier;
}
sub classify {
my ($self, $newattrs) = @_;
$newattrs or die "Missing parameter for classify()";
my $m = $self->model;
# Note that we're using the log(prob) here. That's why we add instead of multiply.
my %scores = %{$m->{prior_probs}};
my %features;
while (my ($feature, $value) = each %$newattrs) {
next unless exists $m->{attributes}{$feature}; # Ignore totally unseen features
while (my ($label, $attributes) = each %{$m->{probs}}) {
my $score = ($attributes->{$feature} || $m->{smoother}{$label})*$value; # P($feature|$label)**$value
$scores{$label} += $score;
$features{$feature}{$label} = $score;
}
}
rescale(\%scores);
return AI::NaiveBayes::Classification->new( label_sums => \%scores, features => \%features );
}
sub rescale {
my ($scores) = @_;
# Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
my $total = 0;
my $max = max(values %$scores);
foreach (values %$scores) {
$_ = exp($_ - $max);
$total += $_**2;
}
$total = sqrt($total);
foreach (values %$scores) {
$_ /= $total;
}
lib/AI/NaiveBayes/Learner.pm view on Meta::CPAN
sub classifier {
my $self = shift;
my $examples = $self->examples;
my $labels = $self->labels;
my $vocab_size = keys %{ $self->attributes };
my $model;
$model->{attributes} = $self->attributes;
# Calculate the log-probabilities for each category
foreach my $label (keys %$labels) {
$model->{prior_probs}{$label} = log($labels->{$label}{count} / $examples);
# Count the number of tokens in this cat
my $label_tokens = sum( values %{ $labels->{$label}{attributes} } );
# Compute a smoothing term so P(word|cat)==0 can be avoided
$model->{smoother}{$label} = -log($label_tokens + $vocab_size);
# P(attr|label) = $count/$label_tokens (simple)
# P(attr|label) = ($count + 1)/($label_tokens + $vocab_size) (with smoothing)
# log P(attr|label) = log($count + 1) - log($label_tokens + $vocab_size)
my $denominator = log($label_tokens + $vocab_size);
while (my ($attribute, $count) = each %{ $labels->{$label}{attributes} }) {
$model->{probs}{$label}{$attribute} = log($count + 1) - $denominator;
}
if ($self->limit_features) {
my %old = %{$model->{probs}{$label}};
my @features = sort { abs($old{$a}) <=> abs($old{$b}) } keys(%old);
my $limit = min($self->features_kept, 0+@features);
if ($limit < 1) {
$limit = int($limit * keys(%old));
}
my @top = @features[0..$limit-1];
( run in 1.455 second using v1.01-cache-2.11-cpan-49f99fa48dc )