AI-NaiveBayes
view release on metacpan or search on metacpan
use AI::NaiveBayes::Learner;
use Moose;
use MooseX::Storage;
use List::Util qw(max);
with Storage(format => 'Storable', io => 'File');
has model => (is => 'ro', isa => 'HashRef[HashRef]', required => 1);
sub train {
my $self = shift;
my $learner = AI::NaiveBayes::Learner->new();
for my $example ( @_ ){
$learner->add_example( %$example );
}
return $learner->classifier;
}
sub classify {
my ($self, $newattrs) = @_;
$newattrs or die "Missing parameter for classify()";
my $m = $self->model;
# Note that we're using the log(prob) here. That's why we add instead of multiply.
my %scores = %{$m->{prior_probs}};
my %features;
while (my ($feature, $value) = each %$newattrs) {
$scores{$label} += $score;
$features{$feature}{$label} = $score;
}
}
rescale(\%scores);
return AI::NaiveBayes::Classification->new( label_sums => \%scores, features => \%features );
}
sub rescale {
my ($scores) = @_;
# Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
my $total = 0;
my $max = max(values %$scores);
foreach (values %$scores) {
$_ = exp($_ - $max);
$total += $_**2;
}
$total = sqrt($total);
lib/AI/NaiveBayes.pm view on Meta::CPAN
use AI::NaiveBayes::Learner;
use Moose;
use MooseX::Storage;
use List::Util qw(max);
with Storage(format => 'Storable', io => 'File');
has model => (is => 'ro', isa => 'HashRef[HashRef]', required => 1);
sub train {
my $self = shift;
my $learner = AI::NaiveBayes::Learner->new();
for my $example ( @_ ){
$learner->add_example( %$example );
}
return $learner->classifier;
}
sub classify {
my ($self, $newattrs) = @_;
$newattrs or die "Missing parameter for classify()";
my $m = $self->model;
# Note that we're using the log(prob) here. That's why we add instead of multiply.
my %scores = %{$m->{prior_probs}};
my %features;
while (my ($feature, $value) = each %$newattrs) {
lib/AI/NaiveBayes.pm view on Meta::CPAN
$scores{$label} += $score;
$features{$feature}{$label} = $score;
}
}
rescale(\%scores);
return AI::NaiveBayes::Classification->new( label_sums => \%scores, features => \%features );
}
sub rescale {
my ($scores) = @_;
# Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
my $total = 0;
my $max = max(values %$scores);
foreach (values %$scores) {
$_ = exp($_ - $max);
$total += $_**2;
}
$total = sqrt($total);
lib/AI/NaiveBayes/Classification.pm view on Meta::CPAN
$AI::NaiveBayes::Classification::VERSION = '0.04';
use strict;
use warnings;
use 5.010;
use Moose;
has features => (is => 'ro', isa => 'HashRef[HashRef]', required => 1);
has label_sums => (is => 'ro', isa => 'HashRef', required => 1);
has best_category => (is => 'ro', isa => 'Str', lazy_build => 1);
sub _build_best_category {
my $self = shift;
my $sc = $self->label_sums;
my ($best_cat, $best_score) = each %$sc;
while (my ($key, $val) = each %$sc) {
($best_cat, $best_score) = ($key, $val) if $val > $best_score;
}
return $best_cat;
}
sub find_predictors{
my $self = shift;
my $best_cat = $self->best_category;
my $features = $self->features;
my @predictors;
for my $feature ( keys %$features ) {
for my $cat ( keys %{ $features->{$feature } } ){
next if $cat eq $best_cat;
push @predictors, [ $feature, $features->{$feature}{$best_cat} - $features->{$feature}{$cat} ];
}
lib/AI/NaiveBayes/Learner.pm view on Meta::CPAN
package AI::NaiveBayes::Learner;
$AI::NaiveBayes::Learner::VERSION = '0.04';
use strict;
use warnings;
use 5.010;
use List::Util qw( min sum );
use Moose;
use AI::NaiveBayes;
has attributes => (is => 'ro', isa => 'HashRef', default => sub { {} }, clearer => '_clear_attrs');
has labels => (is => 'ro', isa => 'HashRef', default => sub { {} }, clearer => '_clear_labels');
has examples => (is => 'ro', isa => 'Int', default => 0, clearer => '_clear_examples');
has features_kept => (is => 'ro', predicate => 'limit_features');
has classifier_class => ( is => 'ro', isa => 'Str', default => 'AI::NaiveBayes' );
sub add_example {
my ($self, %params) = @_;
for ('attributes', 'labels') {
die "Missing required '$_' parameter" unless exists $params{$_};
}
$self->{examples}++;
my $attributes = $params{attributes};
my $labels = $params{labels};
add_hash($self->attributes(), $attributes);
my $our_labels = $self->labels;
foreach my $label ( @$labels ) {
$our_labels->{$label}{count}++;
$our_labels->{$label}{attributes} //= {};
add_hash($our_labels->{$label}{attributes}, $attributes);
}
}
sub classifier {
my $self = shift;
my $examples = $self->examples;
my $labels = $self->labels;
my $vocab_size = keys %{ $self->attributes };
my $model;
$model->{attributes} = $self->attributes;
# Calculate the log-probabilities for each category
lib/AI/NaiveBayes/Learner.pm view on Meta::CPAN
}
my @top = @features[0..$limit-1];
my %kept = map { $_ => $old{$_} } @top;
$model->{probs}{$label} = \%kept;
}
}
my $classifier_class = $self->classifier_class;
return $classifier_class->new( model => $model );
}
sub add_hash {
my ($first, $second) = @_;
$first //= {};
foreach my $k (keys %$second) {
$first->{$k} //= 0;
$first->{$k} += $second->{$k};
}
}
__PACKAGE__->meta->make_immutable;
t/01-learner.t view on Meta::CPAN
$learner->add_example( attributes => _hash(qw(one two three four)),
labels => ['farming'] );
$learner->add_example( attributes => _hash(qw(five six seven eight)),
labels => ['farming'] );
$learner->add_example( attributes => _hash(qw(one two three four)),
labels => ['farming'] );
$model = $learner->classifier->model;
is keys %{$model->{probs}{farming}}, 4, 'half features kept';
is join(" ", sort { $a cmp $b } keys %{$model->{probs}{farming}}), 'four one three two';
sub _hash { +{ map {$_,1} @_ } }
t/02-predict.t view on Meta::CPAN
$classifier = $lr->classifier;
# Predict
$s = $classifier->classify( _hash(qw(jakis tekst po polsku)) );
$h = $s->label_sums;
ok(abs( 3 - $h->{farming} / $h->{vampire} ) < 0.01, 'Prior probabillities' );
################################################################
sub _hash { +{ map {$_,1} @_ } }
t/default_training.t view on Meta::CPAN
{
attributes => _hash(qw(vampires cannot see their images mirrors)),
labels => ['vampire']
},
);
isa_ok( $classifier, 'AI::NaiveBayes' );
################################################################
sub _hash { +{ map {$_,1} @_ } }
( run in 0.310 second using v1.01-cache-2.11-cpan-4d50c553e7e )