AI-NaiveBayes

 view release on metacpan or  search on metacpan

README.pod  view on Meta::CPAN

use AI::NaiveBayes::Learner;
use Moose;
use MooseX::Storage;

use List::Util qw(max);

with Storage(format => 'Storable', io => 'File');

has model   => (is => 'ro', isa => 'HashRef[HashRef]', required => 1);

sub train {
    my $self = shift;
    my $learner = AI::NaiveBayes::Learner->new();
    for my $example ( @_ ){
        $learner->add_example( %$example );
    }
    return $learner->classifier;
}


sub classify {
    my ($self, $newattrs) = @_;
    $newattrs or die "Missing parameter for classify()";

    my $m = $self->model;

    # Note that we're using the log(prob) here.  That's why we add instead of multiply.

    my %scores = %{$m->{prior_probs}};
    my %features;
    while (my ($feature, $value) = each %$newattrs) {

README.pod  view on Meta::CPAN

            $scores{$label} += $score;
            $features{$feature}{$label} = $score;
        }
    }

    rescale(\%scores);

    return AI::NaiveBayes::Classification->new( label_sums => \%scores, features => \%features );
}

sub rescale {
    my ($scores) = @_;

    # Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
    my $total = 0;
    my $max = max(values %$scores);
    foreach (values %$scores) {
        $_ = exp($_ - $max);
        $total += $_**2;
    }
    $total = sqrt($total);

lib/AI/NaiveBayes.pm  view on Meta::CPAN

use AI::NaiveBayes::Learner;
use Moose;
use MooseX::Storage;

use List::Util qw(max);

with Storage(format => 'Storable', io => 'File');

has model   => (is => 'ro', isa => 'HashRef[HashRef]', required => 1);

sub train {
    my $self = shift;
    my $learner = AI::NaiveBayes::Learner->new();
    for my $example ( @_ ){
        $learner->add_example( %$example );
    }
    return $learner->classifier;
}


sub classify {
    my ($self, $newattrs) = @_;
    $newattrs or die "Missing parameter for classify()";

    my $m = $self->model;

    # Note that we're using the log(prob) here.  That's why we add instead of multiply.

    my %scores = %{$m->{prior_probs}};
    my %features;
    while (my ($feature, $value) = each %$newattrs) {

lib/AI/NaiveBayes.pm  view on Meta::CPAN

            $scores{$label} += $score;
            $features{$feature}{$label} = $score;
        }
    }

    rescale(\%scores);

    return AI::NaiveBayes::Classification->new( label_sums => \%scores, features => \%features );
}

sub rescale {
    my ($scores) = @_;

    # Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
    my $total = 0;
    my $max = max(values %$scores);
    foreach (values %$scores) {
        $_ = exp($_ - $max);
        $total += $_**2;
    }
    $total = sqrt($total);

lib/AI/NaiveBayes/Classification.pm  view on Meta::CPAN

$AI::NaiveBayes::Classification::VERSION = '0.04';
use strict;
use warnings;
use 5.010;
use Moose;

has features => (is => 'ro', isa => 'HashRef[HashRef]', required => 1);
has label_sums => (is => 'ro', isa => 'HashRef', required => 1);
has best_category => (is => 'ro', isa => 'Str', lazy_build => 1);

sub _build_best_category {
    my $self = shift;
    my $sc = $self->label_sums;

    my ($best_cat, $best_score) = each %$sc;
    while (my ($key, $val) = each %$sc) {
        ($best_cat, $best_score) = ($key, $val) if $val > $best_score;
    }
    return $best_cat;
}

sub find_predictors{
    my $self = shift;

    my $best_cat = $self->best_category;
    my $features = $self->features;
    my @predictors; 
    for my $feature ( keys %$features  ) {
        for my $cat ( keys %{ $features->{$feature } } ){
            next if $cat eq $best_cat;
            push @predictors, [ $feature, $features->{$feature}{$best_cat} - $features->{$feature}{$cat} ];
        }

lib/AI/NaiveBayes/Learner.pm  view on Meta::CPAN

package AI::NaiveBayes::Learner;
$AI::NaiveBayes::Learner::VERSION = '0.04';
use strict;
use warnings;
use 5.010;

use List::Util qw( min sum );
use Moose;
use AI::NaiveBayes;

has attributes => (is => 'ro', isa => 'HashRef', default => sub { {} }, clearer => '_clear_attrs');
has labels     => (is => 'ro', isa => 'HashRef', default => sub { {} }, clearer => '_clear_labels');
has examples  => (is => 'ro', isa => 'Int',     default => 0, clearer => '_clear_examples');

has features_kept => (is => 'ro', predicate => 'limit_features');

has classifier_class => ( is => 'ro', isa => 'Str', default => 'AI::NaiveBayes' );

sub add_example {
    my ($self, %params) = @_;
    for ('attributes', 'labels') {
        die "Missing required '$_' parameter" unless exists $params{$_};
    }

    $self->{examples}++;

    my $attributes = $params{attributes};
    my $labels     = $params{labels};

    add_hash($self->attributes(), $attributes);

    my $our_labels = $self->labels;
    foreach my $label ( @$labels ) {
        $our_labels->{$label}{count}++;
        $our_labels->{$label}{attributes} //= {};
        add_hash($our_labels->{$label}{attributes}, $attributes);
    }
}

sub classifier {
    my $self = shift;

    my $examples    = $self->examples;
    my $labels       = $self->labels;
    my $vocab_size   = keys %{ $self->attributes };
    my $model;
    $model->{attributes} = $self->attributes;


    # Calculate the log-probabilities for each category

lib/AI/NaiveBayes/Learner.pm  view on Meta::CPAN

            }
            my @top = @features[0..$limit-1];
            my %kept = map { $_ => $old{$_} } @top;
            $model->{probs}{$label} = \%kept;
        }
    }
    my $classifier_class = $self->classifier_class;
    return $classifier_class->new( model => $model );
}

sub add_hash {
    my ($first, $second) = @_;
    $first //= {};
    foreach my $k (keys %$second) {
        $first->{$k} //= 0;
        $first->{$k} += $second->{$k};
    }
}

__PACKAGE__->meta->make_immutable;

t/01-learner.t  view on Meta::CPAN

$learner->add_example( attributes => _hash(qw(one two three four)),
		   labels => ['farming'] );
$learner->add_example( attributes => _hash(qw(five six seven eight)),
		   labels => ['farming'] );
$learner->add_example( attributes => _hash(qw(one two three four)),
		   labels => ['farming'] );
$model = $learner->classifier->model;
is keys %{$model->{probs}{farming}}, 4, 'half features kept';
is join(" ", sort { $a cmp $b } keys %{$model->{probs}{farming}}), 'four one three two';

sub _hash { +{ map {$_,1} @_ } }

t/02-predict.t  view on Meta::CPAN


$classifier = $lr->classifier;

# Predict
$s = $classifier->classify( _hash(qw(jakis tekst po polsku)) );
$h = $s->label_sums;
ok(abs( 3 - $h->{farming} / $h->{vampire} ) < 0.01, 'Prior probabillities' );


################################################################
sub _hash { +{ map {$_,1} @_ } }

t/default_training.t  view on Meta::CPAN

    {
        attributes => _hash(qw(vampires cannot see their images mirrors)),
        labels => ['vampire']
    },
);

isa_ok( $classifier, 'AI::NaiveBayes' );


################################################################
sub _hash { +{ map {$_,1} @_ } }



( run in 0.310 second using v1.01-cache-2.11-cpan-4d50c553e7e )