AI-NaiveBayes1
view release on metacpan or search on metacpan
- fixed testing problems due to differences in the lowest
significant digit on different platforms
1.4 Fri Dec 14 11:42:16 AST 2007
- added test skipping if YAML module is not available
- removing deprecated "our" reserved word
1.3 Fri Dec 7 07:26:47 AST 2007
- added reading from a table format (add_table)
- fixed some warnings reported during testing
- added option "with counts" to print_model
- added smoothing option for attributes
1.2 Mon Mar 14 08:03:16 AST 2005
- addition to documentation
- fixing a minor bug and a warning
1.1 Tue Apr 20 08:07:33 ADT 2004
- added several more test
- implemented optional Gaussian distribution for numerical
attributes
1.0 Thu Sep 4 08:01:19 ADT 2003
- bug fix in testing (attributes and labels are sorted now in
print_model)
0.03 Wed Sep 3 08:25:43 ADT 2003
- bug fix
- import and export to/from string and file (using YAML)
0.02 Fri May 9 15:53:53 ADT 2003
- bug fix
0.01 Fri May 9 14:48:53 ADT 2003
- original version;
Makefile.PL view on Meta::CPAN
AUTHOR => 'Vlado Keselj https://web.cs.dal.ca/~vlado')
#-
: ()
),
($ExtUtils::MakeMaker::VERSION ge '6.30_00'?
('LICENSE' => 'perl', ) : ()),
);
open(M, ">>Makefile") or die;
if ( -f 'priv.make' ) { print M getfile('priv.make') }
close(M);
sub getfile($) {
my $f = shift;
local *F;
open(F, "<$f") or die "getfile:cannot open $f:$!";
my @r = <F>;
close(F);
return wantarray ? @r : join ('', @r);
NaiveBayes1.pm view on Meta::CPAN
"(= $m->{condprob}{$att}{$attval}{$label} / $total)";
$m->{condprob}{$att}{$attval}{$label} /= $total;
}
}
}
# For real-valued attributes, we use Gaussian distribution
# let us collect statistics
foreach my $att (keys(%{$self->{stat_attributes}})) {
next unless $self->{attribute_type}{$att} eq 'real';
print STDERR "Smoothing ignored for real attribute $att!\n" if
defined($self->{smoothing}{att}) and $self->{smoothing}{att};
$m->{real_stat}->{$att} = {};
foreach my $attval (keys %{$self->{stat_attributes}{$att}}){
foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
$m->{real_stat}{$att}{$label}{sum}
+= $attval * $self->{stat_attributes}{$att}{$attval}{$label};
$m->{real_stat}{$att}{$label}{count}
+= $self->{stat_attributes}{$att}{$attval}{$label};
}
NaiveBayes1.pm view on Meta::CPAN
$nscores{$label} =
0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
exp( -0.5 *
( ( $newattrs->{$att} -
$m->{real_stat}{$att}{$label}{mean})
/ $m->{real_stat}{$att}{$label}{stddev}
) ** 2
);
$sum += $nscores{$label};
}
if ($sum==0) { print STDERR "Ignoring all Gaussian probabilities: all=0!\n" }
else {
foreach my $label (@labels) { $scores{$label} *= $nscores{$label} }
}
}
my $sumPx = 0.0;
$sumPx += $scores{$_} foreach (keys(%scores));
$scores{$_} /= $sumPx foreach (keys(%scores));
return \%scores;
}
sub print_model {
my $self = shift;
my $withcounts = '';
if ($#_>-1 && $_[0] eq 'with counts')
{ shift @_; $withcounts = 1; }
my $m = $self->{model};
my @labels = $self->labels;
my $r;
# prepare table category P(category)
my @lines;
NaiveBayes1.pm view on Meta::CPAN
N Y Y N 18
N Y N Y 16
N Y N N 16
N N Y Y 2
N N Y N 9
N N N Y 11
N N N N 91
-------------------------------
");
$nb->train;
print "Model:\n" . $nb->print_model;
print "Model (with counts):\n" . $nb->print_model('with counts');
$nb = AI::NaiveBayes1->new;
$nb->add_instances(attributes=>{model=>'H',place=>'B'},
label=>'repairs=Y',cases=>30);
$nb->add_instances(attributes=>{model=>'H',place=>'B'},
label=>'repairs=N',cases=>10);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},
label=>'repairs=Y',cases=>18);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},
label=>'repairs=N',cases=>16);
NaiveBayes1.pm view on Meta::CPAN
label=>'repairs=Y',cases=>22);
$nb->add_instances(attributes=>{model=>'T',place=>'B'},
label=>'repairs=N',cases=>14);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},
label=>'repairs=Y',cases=> 6);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},
label=>'repairs=N',cases=>84);
$nb->train;
print "Model:\n" . $nb->print_model;
# Find results for unseen instances
my $result = $nb->predict
(attributes => {model=>'T', place=>'N'});
foreach my $k (keys(%{ $result })) {
print "for label $k P = " . $result->{$k} . "\n";
}
# export the model into a string
my $string = $nb->export_to_YAML();
# create the same model from the string
my $nb1 = AI::NaiveBayes1->import_from_YAML($string);
# write the model to a file (shorter than model->string->file)
$nb->export_to_YAML_file('t/tmp1');
NaiveBayes1.pm view on Meta::CPAN
=item export_to_YAML()
Returns a C<YAML> string representation of an C<AI::NaiveBayes1>
object. Requires YAML module.
=item C<export_to_YAML_file( $file_name )>
Writes a C<YAML> string representation of an C<AI::NaiveBayes1>
object to a file. Requires YAML module.
=item C<print_model( OPTIONAL 'with counts' )>
Returns a string, human-friendly representation of the model.
The model is supposed to be trained before calling this method.
One argument 'with counts' can be supplied, in which case explanatory
expressions with counts are printed as well.
=item train()
Calculates the probabilities that will be necessary for categorization
using the C<predict()> method.
=item C<predict( attributes =E<gt> HASH )>
Use this method to predict the label of an unknown instance. The
attributes should be of the same format as you passed to
NaiveBayes1.pm view on Meta::CPAN
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>75,humidity=>80,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>75,humidity=>70,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>72,humidity=>90,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>81,humidity=>75,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>71,humidity=>91,windy=>'TRUE'},label=>'play=no');
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>66,humidity=>90,windy=>'TRUE'});
YAML::DumpFile('file', $p);
die unless (abs($p->{'play=no'} - 0.792) < 0.001);
die unless(abs($p->{'play=yes'} - 0.208) < 0.001);
=head1 HISTORY
L<Algorithm::NaiveBayes> by Ken Williams was not what I needed so I
wrote this one. L<Algorithm::NaiveBayes> is oriented towards text
$nb->add_instances(attributes=>{model=>'H',place=>'B'},label=>'repairs=N',cases=>10);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},label=>'repairs=Y',cases=>18);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},label=>'repairs=N',cases=>16);
$nb->add_instances(attributes=>{model=>'T',place=>'B'},label=>'repairs=Y',cases=>22);
$nb->add_instances(attributes=>{model=>'T',place=>'B'},label=>'repairs=N',cases=>14);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},label=>'repairs=Y',cases=> 6);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},label=>'repairs=N',cases=>84);
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/1-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/1-1.out');
#putfile('t/1-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/1-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 1.t" if $@;
$nb->export_to_YAML_file('t/tmp1');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp1');
$printedmodel = &shorterdecimals($nb1->print_model);
#is("Model:\n" . $printedmodel, getfile('t/1-1.out'));
&compare_by_line("Model:\n" . $printedmodel, 't/1-1.out');
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
$printedmodel = &shorterdecimals($nb2->print_model);
#is("Model:\n" . $printedmodel, getfile('t/1-1.out'));
&compare_by_line("Model:\n" . $printedmodel, 't/1-1.out');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size1=>'L'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size1=>'L'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size1=>'S'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size1=>'S'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size1=>'L'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size1=>'S'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'N',size1=>'S'},label=>'spam=Y');
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/2-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/2-1.out', __FILE__ , __LINE__);
#putfile('t/2-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/2-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 2.t" if $@;
$nb->export_to_YAML_file('t/tmp2');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp2');
$printedmodel = "Model:\n" . $nb1->print_model;
$printedmodel = &shorterdecimals($printedmodel);
&compare_by_line($printedmodel, 't/2-1.out', __FILE__ , __LINE__);
#is("Model:\n" . $nb1->print_model, getfile('t/2-1.out'));
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
$printedmodel = "Model:\n" . $nb2->print_model;
$printedmodel = &shorterdecimals($printedmodel);
&compare_by_line($printedmodel, 't/2-1.out', __FILE__ , __LINE__);
#is("Model:\n" . $nb2->print_model, getfile('t/2-1.out'));
my $p = $nb->predict(attributes=>{morning=>'Y',html=>'Y',size1=>'L'});
#putfile('t/2-3.out', YAML::Dump($p));
like($p->{'spam=N'}, qr/0\.0627/);
like($p->{'spam=Y'}, qr/0\.9372/);
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>'mild',humidity=>'high',windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>'cool',humidity=>'normal',windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>'mild',humidity=>'normal',windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>'mild',humidity=>'normal',windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>'mild',humidity=>'high',windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>'hot',humidity=>'normal',windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>'mild',humidity=>'high',windy=>'TRUE'},label=>'play=no');
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/3-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/3-1.out', __FILE__, __LINE__);
#putfile('t/3-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/3-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 3.t" if $@;
$nb->export_to_YAML_file('t/tmp3');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp3');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/3-1.out', __FILE__, __LINE__);
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
't/3-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>'cool',humidity=>'high',windy=>'TRUE'});
#putfile('t/3-3.out', YAML::Dump($p));
ok(abs($p->{'play=no'} - 0.795) < 0.001);
ok(abs($p->{'play=yes'} - 0.205) < 0.001);
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>75,humidity=>80,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>75,humidity=>70,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>72,humidity=>90,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>81,humidity=>75,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>71,humidity=>91,windy=>'TRUE'},label=>'play=no');
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/4-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/4-1.out', __FILE__, __LINE__);
#putfile('t/4-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/4-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 4.t" if $@;
$nb->export_to_YAML_file('t/tmp4');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp4');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/4-1.out', __FILE__, __LINE__);
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
't/4-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>66,humidity=>90,windy=>'TRUE'});
#putfile('t/4-3.out', YAML::Dump($p));
ok(abs($p->{'play=no'} - 0.792) < 0.001);
ok(abs($p->{'play=yes'} - 0.208) < 0.001);
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>2176},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>877},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>272},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>2740},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>514},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'N',size=>1321},label=>'spam=Y');
$nb->set_real('size');
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/5-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/5-1.out', __FILE__, __LINE__);
#putfile('t/5-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/5-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 5.t" if $@;
$nb->export_to_YAML_file('t/tmp5');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp5');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/5-1.out', __FILE__, __LINE__);
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
't/5-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{morning=>'Y',html=>'Y',size=>4749});
#putfile('t/5-3.out', YAML::Dump($p));
ok(abs($p->{'spam=N'} - 0.043) < 0.001);
ok(abs($p->{'spam=Y'} - 0.957) < 0.001);
$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=N',cases=>10);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/6-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/6-1.out', __FILE__, __LINE__);
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 6.t" if $@;
$nb->export_to_YAML_file('t/tmp6');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp6');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/6-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{C=>'Y',F=>0});
#putfile('t/6-2.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.580) < 0.001);
ok(abs($p->{'S=Y'} - 0.420) < 0.001);
# Continual
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->set_real('F');
$nb->train;
$printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/6-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/6-3.out', __FILE__, __LINE__);
$nb->export_to_YAML_file('t/tmp6-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp6-2');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/6-3.out', __FILE__, __LINE__);
$p = $nb->predict(attributes=>{C=>'Y',F=>1});
#putfile('t/6-4.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.339) < 0.001);
ok(abs($p->{'S=Y'} - 0.661) < 0.001);
$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=N',cases=>10);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/7-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-1.out', __FILE__, __LINE__);
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 7.t" if $@;
$nb->export_to_YAML_file('t/tmp7');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp7');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/7-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{C=>'Y',F=>0});
#putfile('t/7-2.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.580) < 0.001);
ok(abs($p->{'S=Y'} - 0.420) < 0.001);
# Continual
#$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->add_table(" C F S count\n".
"------------------\n".
" N 0 Y 6 \n".
" N 0 N 84 \n".
'');
$nb->set_real('F');
$nb->train;
$printedmodel = &shorterdecimals("Model:\n" . $nb->print_model);
#putfile('t/7-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-3.out', __FILE__, __LINE__);
$nb->export_to_YAML_file('t/tmp7-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp7-2');
&compare_by_line(&shorterdecimals("Model:\n" . $nb1->print_model),
't/7-3.out', __FILE__, __LINE__);
$p = $nb->predict(attributes=>{C=>'Y',F=>1});
#putfile('t/7-4.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.339) < 0.001);
ok(abs($p->{'S=Y'} - 0.661) < 0.001);
$nb = AI::NaiveBayes1->new;
$nb->add_table(
N Y Y N 18
N Y N Y 16
N Y N N 16
N N Y Y 2
N N Y N 9
N N N Y 11
N N N N 91
-------------------------------
");
$nb->train;
$printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/7-5.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-5.out', __FILE__, __LINE__);
$p = $nb->predict(attributes=>{Html=>'N',Caps=>'N',Free=>'Y'});
# putfile('t/7-2.out', YAML::Dump($p));
ok(abs($p->{'Spam=N'} - 0.6580) < 0.001);
ok(abs($p->{'Spam=Y'} - 0.3420) < 0.001);
N fly fly END V 1
PREV PREV fly duck V 2
V fly duck END N 2
-------------------------------
");
$nb->{smoothing}{W} = 'unseen count=0.5';
$nb->{smoothing}{Wp} = 'unseen count=0.5';
$nb->{smoothing}{Wf} = 'unseen count=0.5';
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model('with counts');
putfile('t/8-1.out', $printedmodel);
is($printedmodel, getfile('t/8-1.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 8.t" if $@;
use YAML;
# duck ducks fly flies
my $p = $nb->predict(attributes=>{Tp=>'PREV',Wp=>'PREV',W=>'duck',Wf=>'ducks'});
putfile('t/8-2.out', YAML::Dump($p));
#ok(abs($p->{'S=N'} - 0.580) < 0.001);
t/auxfunctions.pl view on Meta::CPAN
my $testfile = @_ ? shift @_ : '';
my $testline = @_ ? shift @_ : '';
my $expected = getfile($file);
if ($got eq $expected) { pass; return }
my $flag = '';
while ($got ne '' or $expected ne '') {
my $a=$got; if ($a =~ /\s*\n/) { $a = $`; $got = $'; }
my $b=$expected; if ($b =~ /\s*\n/) { $b = $`; $expected = $'; }
if ($a ne $b) {
if ($flag eq '')
{ print STDERR "\n$testfile:$testline: Failed comparison with $file!\n"; $flag = 1; }
print STDERR " Got: $a\n".
"Expected: $b\n";
}
}
if ($flag eq '') { pass } else { fail }
}
sub shorterdecimals {
local $_ = shift;
s/(\d{4}\.\d{10})\d+/$1/g;
s/(\.\d{12})\d+/$1/g;
t/auxfunctions.pl view on Meta::CPAN
open(F, "<$f") or die "getfile:cannot open $f:$!";
my @r = <F>;
close(F);
return wantarray ? @r : join ('', @r);
}
sub putfile($@) {
my $f = shift;
local *F;
open(F, ">$f") or die "putfile:cannot open $f:$!";
print F '' unless @_;
while (@_) { print F shift(@_) }
close(F);
}
1;
( run in 0.774 second using v1.01-cache-2.11-cpan-de7293f3b23 )