AI-NaiveBayes1
view release on metacpan or search on metacpan
Revision history for Perl extension AI::NaiveBayes1.
2.012 Sat May 29 11:00:47 ADT 2021
- Removing non-ASCII character in "Bohmer" last name to avoid testing
issues, "Daniel B\"ohmer" name changed to "Daniel Bohmer" for
documentation purposes
2.011 Fri May 28 07:55:50 ADT 2021
- merged corrections from Daniel Bohmer
2.010 Tue May 25 18:30:47 ADT 2021
- fixing bugs reported by perltesters on some platforms.
It seems to be due to running tests in a parallel way, and
that using the same name of the temporary file is the problem.
2.009 Sun May 23 09:23:06 ADT 2021
- fixed bug with t/auxfunctions.pl not being located during
testing (hopefully fixed because it requires a different
environment to reproduce)
- made Changes reverse chronological
2.008 Fri May 21 10:56:33 ADT 2021
- GitHub release
- documentation improvements
2.007 Wed Jan 29 05:58:37 AST 2020
- documentation update
- fixed bug: import_from_YAML and import_from_YAML_file now
return bless-ed references (they did not return before)
2.006 Tue Mar 15 06:17:04 ADT 2011
- reduced precision in \d{4}.\d+ to \d{4}.\d{10} avoid testing
problems on different platforms
2.005 Tue Mar 8 06:35:43 AST 2011
- reduced precision in comparison to avoid testing problems on
different platforms (from 14 to 12 digits after decimal point)
2.004 Sat Mar 5 12:39:35 AST 2011
- added missing files to MANIFEST (t/7-3.out and t/7-5.out)
2.003 Fri Mar 4 06:43:31 AST 2011
- fixed more testing problems due to differences in precision
2.002 Wed Mar 2 06:29:06 AST 2011
- fixed some testing problems due to differences in precision
2.001 Mon Feb 28 11:18:22 AST 2011
- fixed some testing problems due to differences in precision
2.000 Tue Feb 22 09:58:04 AST 2011
- fixing lexical sorting of version numbers
1.10 Tue Feb 22 09:31:12 AST 2011
- fixed testing problems due to differences in precision
- fixed podchecker warning (some space)
- better test error reporting in 2.t
- added t/pod.t, thanks to Michael Stevens
1.9 Tue Aug 31 09:27:51 ADT 2010
- fixed testing problems due to differences in precision in t/2.t
1.8 Fri Aug 21 06:36:34 ADT 2009
- fixed a pod documentation error
1.7 Thu Aug 20 14:20:15 ADT 2009
- improvements in documentation
- added method add_csv_file
- added method drop_attributes
- removed real_attr and added attribute_type field
1.6 Wed Aug 19 09:09:57 ADT 2009
- improved an error message
- fixed some testing problems due to whitespace
- small improvement in generating documentation
1.5 Wed Jan 30 08:06:22 AST 2008
- fixed testing problems due to differences in the lowest
significant digit on different platforms
1.4 Fri Dec 14 11:42:16 AST 2007
- added test skipping if YAML module is not available
- removing deprecated "our" reserved word
1.3 Fri Dec 7 07:26:47 AST 2007
- added reading from a table format (add_table)
- fixed some warnings reported during testing
- added option "with counts" to print_model
- added smoothing option for attributes
1.2 Mon Mar 14 08:03:16 AST 2005
- addition to documentation
- fixing a minor bug and a warning
1.1 Tue Apr 20 08:07:33 ADT 2004
- added several more test
- implemented optional Gaussian distribution for numerical
attributes
1.0 Thu Sep 4 08:01:19 ADT 2003
- bug fix in testing (attributes and labels are sorted now in
print_model)
0.03 Wed Sep 3 08:25:43 ADT 2003
- bug fix
- import and export to/from string and file (using YAML)
0.02 Fri May 9 15:53:53 ADT 2003
- bug fix
0.01 Fri May 9 14:48:53 ADT 2003
- original version;
{
"abstract" : "Naive Bayes Classification",
"author" : [
"Vlado Keselj https://web.cs.dal.ca/~vlado"
],
"dynamic_config" : 1,
"generated_by" : "ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.150001",
"license" : [
"perl_5"
],
"meta-spec" : {
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
"version" : "2"
},
"name" : "AI-NaiveBayes1",
"no_index" : {
"directory" : [
"t",
"inc"
]
},
"prereqs" : {
"build" : {
"requires" : {
"ExtUtils::MakeMaker" : "0"
}
},
"configure" : {
"requires" : {
"ExtUtils::MakeMaker" : "0"
}
},
"runtime" : {
"requires" : {
"YAML" : "0.0"
}
}
},
"release_status" : "stable",
"version" : "2.012"
}
---
abstract: 'Naive Bayes Classification'
author:
- 'Vlado Keselj https://web.cs.dal.ca/~vlado'
build_requires:
ExtUtils::MakeMaker: '0'
configure_requires:
ExtUtils::MakeMaker: '0'
dynamic_config: 1
generated_by: 'ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.150001'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
name: AI-NaiveBayes1
no_index:
directory:
- t
- inc
requires:
YAML: '0.0'
version: '2.012'
Makefile.PL view on Meta::CPAN
use ExtUtils::MakeMaker;
#<? read_starfish_conf() !>
my $module = 'AI::NaiveBayes1';
my $name = 'NaiveBayes1.pm';
(my $dir = $module) =~ s/::/-/g;
WriteMakefile
( 'NAME' => $module,
'VERSION_FROM' => $name, # finds $VERSION
'dist' => { COMPRESS=>"gzip",
SUFFIX=>"gz",
#PREOP=>('starfish README; '.
# "cp -f README $dir-\$(VERSION); "
# ),
},
'clean' => {FILES => "tmp* testfiles/tmp1 AI *~ t/tmp1 t/*~ t/tmp6 t/tmp6-2"},
'PREREQ_PM' => { YAML => '0.0' },
'PL_FILES' => {},
($] >= 5.005 ? ## Add these new keywords supported since 5.005
(ABSTRACT_FROM => $name, # retrieve abstract from module
# <? echo " AUTHOR => 'Vlado Keselj $VladoURL')" !>#+
AUTHOR => 'Vlado Keselj https://web.cs.dal.ca/~vlado')
#-
: ()
),
($ExtUtils::MakeMaker::VERSION ge '6.30_00'?
('LICENSE' => 'perl', ) : ()),
);
open(M, ">>Makefile") or die;
if ( -f 'priv.make' ) { print M getfile('priv.make') }
close(M);
sub getfile($) {
my $f = shift;
local *F;
open(F, "<$f") or die "getfile:cannot open $f:$!";
my @r = <F>;
close(F);
return wantarray ? @r : join ('', @r);
}
NaiveBayes1.pm view on Meta::CPAN
@EXPORT = qw(new);
use vars qw($Version);
$Version = $VERSION = '2.012';
use vars @EXPORT_OK;
# non-exported package globals go here
use vars qw();
sub new {
my $package = shift;
return bless {
attributes => [ ],
labels => [ ],
attvals => {},
real_stat => {},
numof_instances => 0,
stat_labels => {},
stat_attributes => {},
smoothing => {},
attribute_type => {},
}, $package;
}
sub set_real {
my ($self, @attr) = @_;
foreach my $a (@attr) { $self->{attribute_type}{$a} = 'real' }
}
sub import_from_YAML {
my $package = shift;
my $yaml = shift;
my $self = YAML::Load($yaml);
return bless $self, $package;
}
sub import_from_YAML_file {
my $package = shift;
my $yamlf = shift;
my $self = YAML::LoadFile($yamlf);
return bless $self, $package;
}
# assume that the last header count means counts
# after optionally removing counts, the last header is label
sub add_table {
my $self = shift;
my @atts = (); my $lbl=''; my $cnt = '';
while (@_) {
my $table = shift;
if ($table =~ /^(.*)\n[ \t]*-+\n/) {
my $a = $1; $table = $';
$a =~ s/^\s+//; $a =~ s/\s+$//;
if ($a =~ /\s*\bcount\s*$/) {
$a=$`; $cnt=1; } else { $cnt='' }
@atts = split(/\s+/, $a);
$lbl = pop @atts;
}
while ($table ne '') {
$table =~ /^(.*)\n?/ or die;
my $r=$1; $table = $';
$r =~ s/^\s+//; $r=~ s/\s+$//;
if ($r =~ /^-+$/) { next }
my @v = split(/\s+/, $r);
die "values (#=$#v): {@v}\natts (#=$#atts): @atts, lbl=$lbl,\n".
"count: $cnt\n" unless $#v-($cnt?2:1) == $#atts;
my %av=(); my @a = @atts;
while (@a) { $av{shift @a} = shift(@v) }
$self->add_instances(attributes=>\%av,
label=>"$lbl=$v[0]",
cases=>($cnt?$v[1]:1) );
}
}
} # end of add_table
# Simplified; not generally compatible.
# Assume that the last header is label. The first row contains
# attribute names.
sub add_csv_file {
my $self = shift; my $fn = shift; local *F;
open(F,$fn) or die "Cannot open CSV file `$fn': $!";
local $_ = <F>; my @atts = (); my $lbl=''; my $cnt = '';
chomp; @atts = split(/\s*,\s*/, $_); $lbl = pop @atts;
while (<F>) {
chomp; my @v = split(/\s*,\s*/, $_);
die "values (#=$#v): {@v}\natts (#=$#atts): @atts, lbl=$lbl,\n".
"count: $cnt\n" unless $#v-($cnt?2:1) == $#atts;
my %av=(); my @a = @atts;
while (@a) { $av{shift @a} = shift(@v) }
$self->add_instances(attributes=>\%av,
label=>"$lbl=$v[0]",
cases=>($cnt?$v[1]:1) );
}
close(F);
} # end of add_csv_file
sub drop_attributes {
my $self = shift;
foreach my $a (@_) {
my @tmp = grep { $a ne $_ } @{ $self->{attributes} };
$self->{attributes} = \@tmp;
delete($self->{attvals}{$a});
delete($self->{stat_attributes}{$a});
delete($self->{attribute_type}{$a});
delete($self->{real_stat}{$a});
delete($self->{smoothing}{$a});
}
} # end of drop_attributes
sub add_instances {
my ($self, %params) = @_;
for ('attributes', 'label', 'cases') {
die "Missing required '$_' parameter" unless exists $params{$_};
}
if (scalar(keys(%{ $self->{stat_attributes} })) == 0) {
foreach my $a (keys(%{$params{attributes}})) {
$self->{stat_attributes}{$a} = {};
push @{ $self->{attributes} }, $a;
$self->{attvals}{$a} = [ ];
$self->{attribute_type}{$a} = 'nominal' unless defined($self->{attribute_type}{$a});
}
} else {
foreach my $a (keys(%{$self->{stat_attributes}}))
{ die "attribute not given in instance: $a"
unless exists($params{attributes}{$a}) }
}
$self->{numof_instances} += $params{cases};
push @{ $self->{labels} }, $params{label} unless
exists $self->{stat_labels}->{$params{label}};
$self->{stat_labels}{$params{label}} += $params{cases};
foreach my $a (keys(%{$self->{stat_attributes}})) {
if ( not exists($params{attributes}{$a}) )
{ die "attribute $a not given" }
my $attval = $params{attributes}{$a};
if (not exists($self->{stat_attributes}{$a}{$attval})) {
push @{ $self->{attvals}{$a} }, $attval;
$self->{stat_attributes}{$a}{$attval} = {};
}
$self->{stat_attributes}{$a}{$attval}{$params{label}} += $params{cases};
}
}
sub add_instance {
my ($self, %params) = @_; $params{cases} = 1;
$self->add_instances(%params);
}
sub train {
my $self = shift;
my $m = $self->{model} = {};
$m->{labelprob} = {};
foreach my $label (keys(%{$self->{stat_labels}}))
{ $m->{labelprob}{$label} = $self->{stat_labels}{$label} /
$self->{numof_instances} }
$m->{condprob} = {};
$m->{condprobe} = {};
foreach my $att (keys(%{$self->{stat_attributes}})) {
next if $self->{attribute_type}{$att} eq 'real';
$m->{condprob}{$att} = {};
$m->{condprobe}{$att} = {};
foreach my $label (keys(%{$self->{stat_labels}})) {
my $total = 0; my @attvals = ();
foreach my $attval (keys(%{$self->{stat_attributes}{$att}})) {
next unless
exists($self->{stat_attributes}{$att}{$attval}{$label}) and
$self->{stat_attributes}{$att}{$attval}{$label} > 0;
push @attvals, $attval;
$m->{condprob}{$att}{$attval} = {} unless
exists( $m->{condprob}{$att}{$attval} );
$m->{condprob}{$att}{$attval}{$label} =
$self->{stat_attributes}{$att}{$attval}{$label};
$m->{condprobe}{$att}{$attval} = {} unless
exists( $m->{condprob}{$att}{$attval} );
$m->{condprobe}{$att}{$attval}{$label} =
$self->{stat_attributes}{$att}{$attval}{$label};
$total += $m->{condprob}{$att}{$attval}{$label};
}
if (exists($self->{smoothing}{$att}) and
$self->{smoothing}{$att} =~ /^unseen count=/) {
my $uc = $'; $uc = 0.5 if $uc <= 0;
if(! exists($m->{condprob}{$att}{'*'}) ) {
$m->{condprob}{$att}{'*'} = {};
$m->{condprobe}{$att}{'*'} = {};
}
$m->{condprob}{$att}{'*'}{$label} = $uc;
$total += $uc;
if (grep {$_ eq '*'} @attvals) { die }
push @attvals, '*';
}
foreach my $attval (@attvals) {
$m->{condprobe}{$att}{$attval}{$label} =
"(= $m->{condprob}{$att}{$attval}{$label} / $total)";
$m->{condprob}{$att}{$attval}{$label} /= $total;
}
}
}
# For real-valued attributes, we use Gaussian distribution
# let us collect statistics
foreach my $att (keys(%{$self->{stat_attributes}})) {
next unless $self->{attribute_type}{$att} eq 'real';
print STDERR "Smoothing ignored for real attribute $att!\n" if
defined($self->{smoothing}{att}) and $self->{smoothing}{att};
$m->{real_stat}->{$att} = {};
foreach my $attval (keys %{$self->{stat_attributes}{$att}}){
foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
$m->{real_stat}{$att}{$label}{sum}
+= $attval * $self->{stat_attributes}{$att}{$attval}{$label};
$m->{real_stat}{$att}{$label}{count}
+= $self->{stat_attributes}{$att}{$attval}{$label};
}
foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
next if
!defined($m->{real_stat}{$att}{$label}{count}) ||
$m->{real_stat}{$att}{$label}{count} == 0;
$m->{real_stat}{$att}{$label}{mean} =
$m->{real_stat}{$att}{$label}{sum} /
$m->{real_stat}{$att}{$label}{count};
}
}
# calculate stddev
foreach my $attval (keys %{$self->{stat_attributes}{$att}}) {
foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
$m->{real_stat}{$att}{$label}{stddev} +=
($attval - $m->{real_stat}{$att}{$label}{mean})**2 *
$self->{stat_attributes}{$att}{$attval}{$label};
}
}
foreach my $label (keys %{$m->{real_stat}{$att}}) {
$m->{real_stat}{$att}{$label}{stddev} =
sqrt($m->{real_stat}{$att}{$label}{stddev} /
($m->{real_stat}{$att}{$label}{count}-1)
);
}
} # foreach real attribute
} # end of sub train
sub predict {
my ($self, %params) = @_;
my $newattrs = $params{attributes} or die "Missing 'attributes' parameter for predict()";
my $m = $self->{model}; # For convenience
my %scores;
my @labels = @{ $self->{labels} };
$scores{$_} = $m->{labelprob}{$_} foreach (@labels);
foreach my $att (keys(%{ $newattrs })) {
if (!defined($self->{attribute_type}{$att})) { die "Unknown attribute: `$att'" }
next if $self->{attribute_type}{$att} eq 'real';
die unless exists($self->{stat_attributes}{$att});
my $attval = $newattrs->{$att};
die "Unknown value `$attval' for attribute `$att'."
unless exists($self->{stat_attributes}{$att}{$attval}) or
exists($self->{smoothing}{$att});
foreach my $label (@labels) {
if (exists($m->{condprob}{$att}{$attval}) and
exists($m->{condprob}{$att}{$attval}{$label}) and
$m->{condprob}{$att}{$attval}{$label} > 0 ) {
$scores{$label} *=
$m->{condprob}{$att}{$attval}{$label};
} elsif (exists($self->{smoothing}{$att})) {
$scores{$label} *=
$m->{condprob}{$att}{'*'}{$label};
} else { $scores{$label} = 0 }
}
}
foreach my $att (keys %{$newattrs}){
next unless $self->{attribute_type}{$att} eq 'real';
my $sum=0; my %nscores;
foreach my $label (@labels) {
die unless exists $m->{real_stat}{$att}{$label}{mean};
$nscores{$label} =
0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
exp( -0.5 *
( ( $newattrs->{$att} -
$m->{real_stat}{$att}{$label}{mean})
/ $m->{real_stat}{$att}{$label}{stddev}
) ** 2
);
$sum += $nscores{$label};
}
if ($sum==0) { print STDERR "Ignoring all Gaussian probabilities: all=0!\n" }
else {
foreach my $label (@labels) { $scores{$label} *= $nscores{$label} }
}
}
my $sumPx = 0.0;
$sumPx += $scores{$_} foreach (keys(%scores));
$scores{$_} /= $sumPx foreach (keys(%scores));
return \%scores;
}
sub print_model {
my $self = shift;
my $withcounts = '';
if ($#_>-1 && $_[0] eq 'with counts')
{ shift @_; $withcounts = 1; }
my $m = $self->{model};
my @labels = $self->labels;
my $r;
# prepare table category P(category)
my @lines;
push @lines, 'category ', '-';
push @lines, "$_ " foreach @labels;
@lines = _append_lines(@lines);
@lines = map { $_.='| ' } @lines;
$lines[1] = substr($lines[1],0,length($lines[1])-2).'+-';
$lines[0] .= "P(category) ";
foreach my $i (2..$#lines) {
my $label = $labels[$i-2];
$lines[$i] .= $m->{labelprob}{$label} .' ';
if ($withcounts) {
$lines[$i] .= "(= $self->{stat_labels}{$label} / ".
"$self->{numof_instances} ) ";
}
}
@lines = _append_lines(@lines);
$r .= join("\n", @lines) . "\n". $lines[1]. "\n\n";
# prepare conditional tables
my @attributes = sort $self->attributes;
foreach my $att (@attributes) {
@lines = ( "category ", '-' );
my @lines1 = ( "$att ", '-' );
my @lines2 = ( "P( $att | category ) ", '-' );
my @attvals = sort keys(%{ $m->{condprob}{$att} });
foreach my $label (@labels) {
if ( $self->{attribute_type}{$att} ne 'real' ) {
foreach my $attval (@attvals) {
next unless exists($m->{condprob}{$att}{$attval}{$label});
push @lines, "$label ";
push @lines1, "$attval ";
my $line = $m->{condprob}{$att}{$attval}{$label};
if ($withcounts)
{ $line.= ' '.$m->{condprobe}{$att}{$attval}{$label} }
$line .= ' ';
push @lines2, $line;
}
} else {
push @lines, "$label ";
push @lines1, "real ";
push @lines2, "Gaussian(mean=".
$m->{real_stat}{$att}{$label}{mean}.",stddev=".
$m->{real_stat}{$att}{$label}{stddev}.") ";
}
push @lines, '-'; push @lines1, '-'; push @lines2, '-';
}
@lines = _append_lines(@lines);
foreach my $i (0 .. $#lines)
{ $lines[$i] .= ($lines[$i]=~/-$/?'+-':'| ') . $lines1[$i] }
@lines = _append_lines(@lines);
foreach my $i (0 .. $#lines)
{ $lines[$i] .= ($lines[$i]=~/-$/?'+-':'| ') . $lines2[$i] }
@lines = _append_lines(@lines);
$r .= join("\n", @lines). "\n\n";
}
return $r;
}
sub _append_lines {
my @l = @_;
my $m = 0;
foreach (@l) { $m = length($_) if length($_) > $m }
@l = map
{ while (length($_) < $m) { $_.=substr($_,length($_)-1) }; $_ }
@l;
return @l;
}
sub labels {
my $self = shift;
return @{ $self->{labels} };
}
sub attributes {
my $self = shift;
return keys %{ $self->{stat_attributes} };
}
sub export_to_YAML {
my $self = shift;
require YAML;
return YAML::Dump($self);
}
sub export_to_YAML_file {
my $self = shift;
my $file = shift;
require YAML;
YAML::DumpFile($file, $self);
}
1;
__END__
=head1 NAME
AI::NaiveBayes1 - Naive Bayes Classification
=head1 SYNOPSIS
use AI::NaiveBayes1;
my $nb = AI::NaiveBayes1->new;
$nb->add_table(
"Html Caps Free Spam count
-------------------------------
Y Y Y Y 42
Y Y Y N 32
Y Y N Y 17
Y Y N N 7
Y N Y Y 32
Y N Y N 12
Y N N Y 20
Y N N N 16
N Y Y Y 38
N Y Y N 18
N Y N Y 16
N Y N N 16
N N Y Y 2
N N Y N 9
N N N Y 11
N N N N 91
-------------------------------
");
$nb->train;
print "Model:\n" . $nb->print_model;
print "Model (with counts):\n" . $nb->print_model('with counts');
$nb = AI::NaiveBayes1->new;
$nb->add_instances(attributes=>{model=>'H',place=>'B'},
label=>'repairs=Y',cases=>30);
$nb->add_instances(attributes=>{model=>'H',place=>'B'},
label=>'repairs=N',cases=>10);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},
label=>'repairs=Y',cases=>18);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},
label=>'repairs=N',cases=>16);
$nb->add_instances(attributes=>{model=>'T',place=>'B'},
label=>'repairs=Y',cases=>22);
$nb->add_instances(attributes=>{model=>'T',place=>'B'},
label=>'repairs=N',cases=>14);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},
label=>'repairs=Y',cases=> 6);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},
label=>'repairs=N',cases=>84);
$nb->train;
print "Model:\n" . $nb->print_model;
# Find results for unseen instances
my $result = $nb->predict
(attributes => {model=>'T', place=>'N'});
foreach my $k (keys(%{ $result })) {
print "for label $k P = " . $result->{$k} . "\n";
}
# export the model into a string
my $string = $nb->export_to_YAML();
# create the same model from the string
my $nb1 = AI::NaiveBayes1->import_from_YAML($string);
# write the model to a file (shorter than model->string->file)
$nb->export_to_YAML_file('t/tmp1');
# read the model from a file (shorter than file->string->model)
my $nb2 = AI::NaiveBayes1->import_from_YAML_file('t/tmp1');
See Examples for more examples.
=head1 DESCRIPTION
This module implements the classic "Naive Bayes" machine learning
algorithm.
=head2 Data Structure
NaiveBayes1.pm view on Meta::CPAN
=item C<{stat_attributes}{$a}>
Statistics for an attribute: C<...{$value}{$label}> = count of
instances.
=item C<{smoothing}{$attribute}>
Attribute smoothing. No smoothing if does not exist. Implemented smoothing:
- /^unseen count=/ followed by number, e.g., 0.5
=back
=head2 Attribute Smoothing
For an attribute A one can specify:
$nb->{smoothing}{A} = 'unseen count=0.5';
to provide a count for unseen data. The count is taken into
consideration in training and prediction, when any unseen attribute
values are observed. Zero probabilities can be prevented in this way.
A count other than 0.5 can be provided, but if it is <=0 it will be
set to 0.5. The method is similar to add-one smoothing. A special
attribute value '*' is used for all unseen data.
=head1 METHODS
NaiveBayes1.pm view on Meta::CPAN
particular order), or the number of labels if called in a scalar
context.
=back
=head1 THEORY
Bayes' Theorem is a way of inverting a conditional probability. It
states:
P(y|x) P(x)
P(x|y) = -------------
P(y)
and so on...
This is a pretty standard algorithm explained in many machine learning
textbooks (e.g., "Data Mining" by Witten and Eibe).
The algorithm relies on estimating P(A|C), where A is an arbitrary
attribute, and C is the class attribute. If A is not real-valued,
then this conditional probability is estimated using a table of all
possible values for A and C.
If A is real-valued, then the distribution P(A|C) is modeled as a
Gaussian (normal) distribution for each possible value of C=c, Hence,
for each C=c we collect the mean value (m) and standard deviation (s)
for A during training. During classification, P(A=a|C=c) is estimated
using Gaussian distribution, i.e., in the following way:
1 (a-m)^2
P(A=a|C=c) = ------------ * exp( - ------- )
sqrt(2*Pi)*s 2*s^2
this boils down to the following lines of code:
$scores{$label} *=
0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
exp( -0.5 *
( ( $newattrs->{$att} -
$m->{real_stat}{$att}{$label}{mean})
/ $m->{real_stat}{$att}{$label}{stddev}
) ** 2
);
i.e.,
P(A=a|C=c) = 0.398942280401433 / s *
exp( -0.5 * ( ( a-m ) / s ) ** 2 );
=head1 EXAMPLES
Example with a real-valued attribute modeled by a Gaussian
distribution (from Witten I. and Frank E. book "Data Mining" (the WEKA
book), page 86):
# @relation weather
#
# @attribute outlook {sunny, overcast, rainy}
# @attribute temperature real
# @attribute humidity real
# @attribute windy {TRUE, FALSE}
# @attribute play {yes, no}
#
# @data
# sunny,85,85,FALSE,no
# sunny,80,90,TRUE,no
# overcast,83,86,FALSE,yes
# rainy,70,96,FALSE,yes
# rainy,68,80,FALSE,yes
# rainy,65,70,TRUE,no
# overcast,64,65,TRUE,yes
# sunny,72,95,FALSE,no
# sunny,69,70,FALSE,yes
# rainy,75,80,FALSE,yes
# sunny,75,70,TRUE,yes
# overcast,72,90,TRUE,yes
# overcast,81,75,FALSE,yes
# rainy,71,91,TRUE,no
$nb->set_real('temperature', 'humidity');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>85,humidity=>85,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>80,humidity=>90,windy=>'TRUE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>83,humidity=>86,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>70,humidity=>96,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>68,humidity=>80,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>65,humidity=>70,windy=>'TRUE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>64,humidity=>65,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>75,humidity=>80,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>75,humidity=>70,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>72,humidity=>90,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>81,humidity=>75,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>71,humidity=>91,windy=>'TRUE'},label=>'play=no');
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>66,humidity=>90,windy=>'TRUE'});
YAML::DumpFile('file', $p);
die unless (abs($p->{'play=no'} - 0.792) < 0.001);
die unless(abs($p->{'play=yes'} - 0.208) < 0.001);
=head1 HISTORY
L<Algorithm::NaiveBayes> by Ken Williams was not what I needed so I
wrote this one. L<Algorithm::NaiveBayes> is oriented towards text
categorization, it includes smoothing, and log probabilities. This
module is a generic, basic Naive Bayes algorithm.
=head1 THANKS
AI/NaiveBayes1 version 2.011
============================
This is an implementation of the basic Naive Bayes classification
algorithm. Documentation is a part of the file NaiveBayes1.pm in the
pod format. To convert it into manual page format, type something
like the following:
pod2html NaiveBayes1.pm
INSTALLATION
To install this module type the following:
perl Makefile.PL
make
make test
make install
DEPENDENCIES
The methods for importing to and exporting from YAML format require
module YAML.
COPYRIGHT AND LICENCE
Copyright 2003-21 Vlado Keselj https://web.cs.dal.ca/~vlado
--- #YAML:1.0 !perl/AI::NaiveBayes1
attributes:
- place
- model
attvals:
model:
- H
- T
place:
- B
- N
labels:
- repairs=Y
- repairs=N
model:
condprob:
model:
H:
repairs=N: 0.209677419354839
repairs=Y: 0.631578947368421
T:
repairs=N: 0.790322580645161
repairs=Y: 0.368421052631579
place:
B:
repairs=N: 0.193548387096774
repairs=Y: 0.684210526315789
N:
repairs=N: 0.806451612903226
repairs=Y: 0.315789473684211
labelprob:
repairs=N: 0.62
repairs=Y: 0.38
numerical_attr: {}
numof_instances: 200
stat_attributes:
model:
H:
repairs=N: 26
repairs=Y: 48
T:
repairs=N: 98
repairs=Y: 28
place:
B:
repairs=N: 24
repairs=Y: 52
N:
repairs=N: 100
repairs=Y: 24
stat_labels:
repairs=N: 124
repairs=Y: 76
--- #YAML:1.0 !perl/AI::NaiveBayes1
attributes:
- size1
- morning
- html
attvals:
html:
- N
- Y
morning:
- Y
- N
size1:
- S
- L
labels:
- spam=N
- spam=Y
model:
condprob:
html:
N:
spam=N: 0.666666666666667
spam=Y: 0.0588235294117647
Y:
spam=N: 0.333333333333333
spam=Y: 0.941176470588235
morning:
N:
spam=N: 0.333333333333333
spam=Y: 0.647058823529412
Y:
spam=N: 0.666666666666667
spam=Y: 0.352941176470588
size1:
L:
spam=N: 0.333333333333333
spam=Y: 0.588235294117647
S:
spam=N: 0.666666666666667
spam=Y: 0.411764705882353
labelprob:
spam=N: 0.15
spam=Y: 0.85
numerical_attr: {}
numof_instances: 20
stat_attributes:
html:
N:
spam=N: 2
spam=Y: 1
Y:
spam=N: 1
spam=Y: 16
morning:
N:
spam=N: 1
spam=Y: 11
Y:
spam=N: 2
spam=Y: 6
size1:
L:
spam=N: 1
spam=Y: 10
S:
spam=N: 2
spam=Y: 7
stat_labels:
spam=N: 3
spam=Y: 17
--- #YAML:1.0 !perl/AI::NaiveBayes1
attributes:
- outlook
- windy
- humidity
- temperature
attvals:
humidity:
- high
- normal
outlook:
- sunny
- overcast
- rainy
temperature:
- hot
- mild
- cool
windy:
- FALSE
- TRUE
labels:
- play=no
- play=yes
model:
condprob:
humidity:
high:
play=no: 0.8
play=yes: 0.333333333333333
normal:
play=no: 0.2
play=yes: 0.666666666666667
outlook:
overcast:
play=no: 0
play=yes: 0.444444444444444
rainy:
play=no: 0.4
play=yes: 0.333333333333333
sunny:
play=no: 0.6
play=yes: 0.222222222222222
temperature:
cool:
play=no: 0.2
play=yes: 0.333333333333333
hot:
play=no: 0.4
play=yes: 0.222222222222222
mild:
play=no: 0.4
play=yes: 0.444444444444444
windy:
FALSE:
play=no: 0.4
play=yes: 0.666666666666667
TRUE:
play=no: 0.6
play=yes: 0.333333333333333
labelprob:
play=no: 0.357142857142857
play=yes: 0.642857142857143
numerical_attr: {}
numof_instances: 14
stat_attributes:
humidity:
high:
play=no: 4
play=yes: 3
normal:
play=no: 1
play=yes: 6
outlook:
overcast:
play=yes: 4
rainy:
play=no: 2
play=yes: 3
sunny:
play=no: 3
play=yes: 2
temperature:
cool:
play=no: 1
play=yes: 3
hot:
play=no: 2
play=yes: 2
mild:
play=no: 2
play=yes: 4
windy:
FALSE:
play=no: 2
play=yes: 6
TRUE:
play=no: 3
play=yes: 3
stat_labels:
play=no: 5
play=yes: 9
#putfile('t/3-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/3-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 3.t" if $@;
$nb->export_to_YAML_file('t/tmp3');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp3');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/3-1.out', __FILE__, __LINE__);
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
't/3-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>'cool',humidity=>'high',windy=>'TRUE'});
#putfile('t/3-3.out', YAML::Dump($p));
ok(abs($p->{'play=no'} - 0.795) < 0.001);
ok(abs($p->{'play=yes'} - 0.205) < 0.001);
--- #YAML:1.0 !perl/AI::NaiveBayes1
attributes:
- outlook
- windy
- humidity
- temperature
attvals:
humidity:
- 85
- 90
- 86
- 96
- 80
- 70
- 65
- 95
- 75
- 91
outlook:
- sunny
- overcast
- rainy
temperature:
- 85
- 80
- 83
- 70
- 68
- 65
- 64
- 72
- 69
- 75
- 81
- 71
windy:
- FALSE
- TRUE
labels:
- play=>no
- play=>yes
model:
condprob:
outlook:
overcast:
play=>no: 0
play=>yes: 0.444444444444444
rainy:
play=>no: 0.4
play=>yes: 0.333333333333333
sunny:
play=>no: 0.6
play=>yes: 0.222222222222222
windy:
FALSE:
play=>no: 0.4
play=>yes: 0.666666666666667
TRUE:
play=>no: 0.6
play=>yes: 0.333333333333333
labelprob:
play=>no: 0.357142857142857
play=>yes: 0.642857142857143
real_attr: &1
humidity: 1
temperature: 1
real_stat:
humidity:
play=>no:
count: 5
mean: 86.2
stddev: 9.73139250056229
sum: 431
play=>yes:
count: 9
mean: 79.1111111111111
stddev: 10.2157286138146
sum: 712
temperature:
play=>no:
count: 5
mean: 74.6
stddev: 7.89303490426845
sum: 373
play=>yes:
count: 9
mean: 73
stddev: 6.16441400296898
sum: 657
numof_instances: 14
real_attr: *1
real_stat: {}
stat_attributes:
humidity:
65:
play=>yes: 1
70:
play=>no: 1
play=>yes: 2
75:
play=>yes: 1
80:
play=>yes: 2
85:
play=>no: 1
86:
play=>yes: 1
90:
play=>no: 1
play=>yes: 1
91:
play=>no: 1
95:
play=>no: 1
96:
play=>yes: 1
outlook:
overcast:
play=>yes: 4
rainy:
play=>no: 2
play=>yes: 3
sunny:
play=>no: 3
play=>yes: 2
temperature:
64:
play=>yes: 1
65:
play=>no: 1
68:
play=>yes: 1
69:
play=>yes: 1
70:
play=>yes: 1
71:
play=>no: 1
72:
play=>no: 1
play=>yes: 1
75:
play=>yes: 2
80:
play=>no: 1
81:
play=>yes: 1
83:
play=>yes: 1
85:
play=>no: 1
windy:
FALSE:
play=>no: 2
play=>yes: 6
TRUE:
play=>no: 3
play=>yes: 3
stat_labels:
play=>no: 5
play=>yes: 9
#putfile('t/4-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/4-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 4.t" if $@;
$nb->export_to_YAML_file('t/tmp4');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp4');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/4-1.out', __FILE__, __LINE__);
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
't/4-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>66,humidity=>90,windy=>'TRUE'});
#putfile('t/4-3.out', YAML::Dump($p));
ok(abs($p->{'play=no'} - 0.792) < 0.001);
ok(abs($p->{'play=yes'} - 0.208) < 0.001);
--- !!perl/hash:AI::NaiveBayes1
attribute_type:
html: nominal
morning: nominal
size: real
attributes:
- morning
- html
- size
attvals:
html:
- N
- Y
morning:
- Y
- N
size:
- 408
- 3353
- 4995
- 1853
- 732
- 4017
- 3190
- 2345
- 3569
- 559
- 1732
- 2042
- 3893
- 3601
- 2176
- 877
- 272
- 2740
- 514
- 1321
labels:
- spam=N
- spam=Y
model:
condprob:
html:
N:
spam=N: 0.666666666666667
spam=Y: 0.0588235294117647
Y:
spam=N: 0.333333333333333
spam=Y: 0.941176470588235
morning:
N:
spam=N: 0.333333333333333
spam=Y: 0.647058823529412
Y:
spam=N: 0.666666666666667
spam=Y: 0.352941176470588
size: {}
condprobe:
html:
N:
spam=N: (= 2 / 3)
spam=Y: (= 1 / 17)
Y:
spam=N: (= 1 / 3)
spam=Y: (= 16 / 17)
morning:
N:
spam=N: (= 1 / 3)
spam=Y: (= 11 / 17)
Y:
spam=N: (= 2 / 3)
spam=Y: (= 6 / 17)
labelprob:
spam=N: 0.15
spam=Y: 0.85
real_stat:
size:
spam=N:
count: 3
mean: 1443.33333333333
stddev: 1521.30777074638
sum: 4330
spam=Y:
count: 17
mean: 2344.64705882353
stddev: 1397.40106721265
sum: 39859
numof_instances: 20
real_stat: {}
smoothing: {}
stat_attributes:
html:
N:
spam=N: 2
spam=Y: 1
Y:
spam=N: 1
spam=Y: 16
morning:
N:
spam=N: 1
spam=Y: 11
Y:
spam=N: 2
spam=Y: 6
size:
1321:
spam=Y: 1
1732:
spam=Y: 1
1853:
spam=Y: 1
2042:
spam=Y: 1
2176:
spam=Y: 1
2345:
spam=Y: 1
272:
spam=Y: 1
2740:
spam=Y: 1
3190:
spam=N: 1
3353:
spam=Y: 1
3569:
spam=Y: 1
3601:
spam=Y: 1
3893:
spam=Y: 1
4017:
spam=Y: 1
408:
spam=N: 1
4995:
spam=Y: 1
514:
spam=Y: 1
559:
spam=Y: 1
732:
spam=N: 1
877:
spam=Y: 1
stat_labels:
spam=N: 3
spam=Y: 17
#putfile('t/5-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/5-2.out'));
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 5.t" if $@;
$nb->export_to_YAML_file('t/tmp5');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp5');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/5-1.out', __FILE__, __LINE__);
my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
't/5-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{morning=>'Y',html=>'Y',size=>4749});
#putfile('t/5-3.out', YAML::Dump($p));
ok(abs($p->{'spam=N'} - 0.043) < 0.001);
ok(abs($p->{'spam=Y'} - 0.957) < 0.001);
#putfile('t/6-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/6-1.out', __FILE__, __LINE__);
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 6.t" if $@;
$nb->export_to_YAML_file('t/tmp6');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp6');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/6-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{C=>'Y',F=>0});
#putfile('t/6-2.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.580) < 0.001);
ok(abs($p->{'S=Y'} - 0.420) < 0.001);
# Continual
$nb = AI::NaiveBayes1->new;
$printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/6-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/6-3.out', __FILE__, __LINE__);
$nb->export_to_YAML_file('t/tmp6-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp6-2');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/6-3.out', __FILE__, __LINE__);
$p = $nb->predict(attributes=>{C=>'Y',F=>1});
#putfile('t/6-4.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.339) < 0.001);
ok(abs($p->{'S=Y'} - 0.661) < 0.001);
#putfile('t/7-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-1.out', __FILE__, __LINE__);
eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 7.t" if $@;
$nb->export_to_YAML_file('t/tmp7');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp7');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
't/7-1.out', __FILE__, __LINE__);
my $p = $nb->predict(attributes=>{C=>'Y',F=>0});
#putfile('t/7-2.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.580) < 0.001);
ok(abs($p->{'S=Y'} - 0.420) < 0.001);
# Continual
$nb = AI::NaiveBayes1->new;
$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=Y',cases=>30);
$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=N',cases=>10);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
#$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
#$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->add_table(" C F S count\n".
"------------------\n".
" N 0 Y 6 \n".
" N 0 N 84 \n".
'');
$nb->set_real('F');
$nb->train;
$printedmodel = &shorterdecimals("Model:\n" . $nb->print_model);
#putfile('t/7-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-3.out', __FILE__, __LINE__);
$nb->export_to_YAML_file('t/tmp7-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp7-2');
&compare_by_line(&shorterdecimals("Model:\n" . $nb1->print_model),
't/7-3.out', __FILE__, __LINE__);
$p = $nb->predict(attributes=>{C=>'Y',F=>1});
#putfile('t/7-4.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.339) < 0.001);
ok(abs($p->{'S=Y'} - 0.661) < 0.001);
$nb = AI::NaiveBayes1->new;
$nb->add_table(
"Html Caps Free Spam count
-------------------------------
Y Y Y Y 42
Y Y Y N 32
Y Y N Y 17
Y Y N N 7
Y N Y Y 32
Y N Y N 12
Y N N Y 20
Y N N N 16
N Y Y Y 38
N Y Y N 18
N Y N Y 16
N Y N N 16
N N Y Y 2
N N Y N 9
N N N Y 11
N N N N 91
-------------------------------
");
$nb->train;
$printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/7-5.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-5.out', __FILE__, __LINE__);
$p = $nb->predict(attributes=>{Html=>'N',Caps=>'N',Free=>'Y'});
# putfile('t/7-2.out', YAML::Dump($p));
use Test::More tests => 2;
use_ok("AI::NaiveBayes1");
use lib '.';
require 't/auxfunctions.pl';
my $nb;
$nb = AI::NaiveBayes1->new;
$nb->add_table(
" Tp Wp W Wf T count
-------------------------------
PREV PREV duck ducks N 4
N duck ducks END V 4
PREV PREV duck flies N 8
N duck flies END V 8
PREV PREV duck fly N 4
N duck fly fly N 4
N fly fly END V 4
PREV PREV duck ducks V 4
V duck ducks END N 4
PREV PREV duck END V 1
PREV PREV ducks duck N 1
N ducks duck END V 1
PREV PREV ducks fly N 4
N ducks fly END V 4
PREV PREV flies fly N 4
N flies fly END V 4
PREV PREV fly flies N 1
N fly flies END V 1
PREV PREV fly fly N 1
N fly fly fly N 1
N fly fly END V 1
PREV PREV fly duck V 2
V fly duck END N 2
-------------------------------
");
$nb->{smoothing}{W} = 'unseen count=0.5';
$nb->{smoothing}{Wp} = 'unseen count=0.5';
$nb->{smoothing}{Wf} = 'unseen count=0.5';
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model('with counts');
putfile('t/8-1.out', $printedmodel);
is($printedmodel, getfile('t/8-1.out'));
t/auxfunctions.pl view on Meta::CPAN
#!/usr/bin/perl
sub compare_by_line {
my $got = shift;
my $file = shift;
my $testfile = @_ ? shift @_ : '';
my $testline = @_ ? shift @_ : '';
my $expected = getfile($file);
if ($got eq $expected) { pass; return }
my $flag = '';
while ($got ne '' or $expected ne '') {
my $a=$got; if ($a =~ /\s*\n/) { $a = $`; $got = $'; }
my $b=$expected; if ($b =~ /\s*\n/) { $b = $`; $expected = $'; }
if ($a ne $b) {
if ($flag eq '')
{ print STDERR "\n$testfile:$testline: Failed comparison with $file!\n"; $flag = 1; }
print STDERR " Got: $a\n".
"Expected: $b\n";
}
}
if ($flag eq '') { pass } else { fail }
}
sub shorterdecimals {
local $_ = shift;
s/(\d{4}\.\d{10})\d+/$1/g;
s/(\.\d{12})\d+/$1/g;
s/---+/---/g;
return $_;
}
sub getfile($) {
my $f = shift;
local *F;
open(F, "<$f") or die "getfile:cannot open $f:$!";
my @r = <F>;
close(F);
return wantarray ? @r : join ('', @r);
}
sub putfile($@) {
my $f = shift;
local *F;
open(F, ">$f") or die "putfile:cannot open $f:$!";
print F '' unless @_;
while (@_) { print F shift(@_) }
close(F);
}
1;
( run in 0.449 second using v1.01-cache-2.11-cpan-4d50c553e7e )