AI-NaiveBayes1
view release on metacpan or search on metacpan
Makefile.PL view on Meta::CPAN
($ExtUtils::MakeMaker::VERSION ge '6.30_00'?
('LICENSE' => 'perl', ) : ()),
);
open(M, ">>Makefile") or die;
if ( -f 'priv.make' ) { print M getfile('priv.make') }
close(M);
sub getfile($) {
my $f = shift;
local *F;
open(F, "<$f") or die "getfile:cannot open $f:$!";
my @r = <F>;
close(F);
return wantarray ? @r : join ('', @r);
}
NaiveBayes1.pm view on Meta::CPAN
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@EXPORT = qw(new);
use vars qw($Version);
$Version = $VERSION = '2.012';
use vars @EXPORT_OK;
# non-exported package globals go here
use vars qw();
sub new {
my $package = shift;
return bless {
attributes => [ ],
labels => [ ],
attvals => {},
real_stat => {},
numof_instances => 0,
stat_labels => {},
stat_attributes => {},
smoothing => {},
attribute_type => {},
}, $package;
}
sub set_real {
my ($self, @attr) = @_;
foreach my $a (@attr) { $self->{attribute_type}{$a} = 'real' }
}
sub import_from_YAML {
my $package = shift;
my $yaml = shift;
my $self = YAML::Load($yaml);
return bless $self, $package;
}
sub import_from_YAML_file {
my $package = shift;
my $yamlf = shift;
my $self = YAML::LoadFile($yamlf);
return bless $self, $package;
}
# assume that the last header count means counts
# after optionally removing counts, the last header is label
sub add_table {
my $self = shift;
my @atts = (); my $lbl=''; my $cnt = '';
while (@_) {
my $table = shift;
if ($table =~ /^(.*)\n[ \t]*-+\n/) {
my $a = $1; $table = $';
$a =~ s/^\s+//; $a =~ s/\s+$//;
if ($a =~ /\s*\bcount\s*$/) {
$a=$`; $cnt=1; } else { $cnt='' }
@atts = split(/\s+/, $a);
NaiveBayes1.pm view on Meta::CPAN
$self->add_instances(attributes=>\%av,
label=>"$lbl=$v[0]",
cases=>($cnt?$v[1]:1) );
}
}
} # end of add_table
# Simplified; not generally compatible.
# Assume that the last header is label. The first row contains
# attribute names.
sub add_csv_file {
my $self = shift; my $fn = shift; local *F;
open(F,$fn) or die "Cannot open CSV file `$fn': $!";
local $_ = <F>; my @atts = (); my $lbl=''; my $cnt = '';
chomp; @atts = split(/\s*,\s*/, $_); $lbl = pop @atts;
while (<F>) {
chomp; my @v = split(/\s*,\s*/, $_);
die "values (#=$#v): {@v}\natts (#=$#atts): @atts, lbl=$lbl,\n".
"count: $cnt\n" unless $#v-($cnt?2:1) == $#atts;
my %av=(); my @a = @atts;
while (@a) { $av{shift @a} = shift(@v) }
$self->add_instances(attributes=>\%av,
label=>"$lbl=$v[0]",
cases=>($cnt?$v[1]:1) );
}
close(F);
} # end of add_csv_file
sub drop_attributes {
my $self = shift;
foreach my $a (@_) {
my @tmp = grep { $a ne $_ } @{ $self->{attributes} };
$self->{attributes} = \@tmp;
delete($self->{attvals}{$a});
delete($self->{stat_attributes}{$a});
delete($self->{attribute_type}{$a});
delete($self->{real_stat}{$a});
delete($self->{smoothing}{$a});
}
} # end of drop_attributes
sub add_instances {
my ($self, %params) = @_;
for ('attributes', 'label', 'cases') {
die "Missing required '$_' parameter" unless exists $params{$_};
}
if (scalar(keys(%{ $self->{stat_attributes} })) == 0) {
foreach my $a (keys(%{$params{attributes}})) {
$self->{stat_attributes}{$a} = {};
push @{ $self->{attributes} }, $a;
$self->{attvals}{$a} = [ ];
NaiveBayes1.pm view on Meta::CPAN
{ die "attribute $a not given" }
my $attval = $params{attributes}{$a};
if (not exists($self->{stat_attributes}{$a}{$attval})) {
push @{ $self->{attvals}{$a} }, $attval;
$self->{stat_attributes}{$a}{$attval} = {};
}
$self->{stat_attributes}{$a}{$attval}{$params{label}} += $params{cases};
}
}
sub add_instance {
my ($self, %params) = @_; $params{cases} = 1;
$self->add_instances(%params);
}
sub train {
my $self = shift;
my $m = $self->{model} = {};
$m->{labelprob} = {};
foreach my $label (keys(%{$self->{stat_labels}}))
{ $m->{labelprob}{$label} = $self->{stat_labels}{$label} /
$self->{numof_instances} }
$m->{condprob} = {};
$m->{condprobe} = {};
NaiveBayes1.pm view on Meta::CPAN
$self->{stat_attributes}{$att}{$attval}{$label};
}
}
foreach my $label (keys %{$m->{real_stat}{$att}}) {
$m->{real_stat}{$att}{$label}{stddev} =
sqrt($m->{real_stat}{$att}{$label}{stddev} /
($m->{real_stat}{$att}{$label}{count}-1)
);
}
} # foreach real attribute
} # end of sub train
sub predict {
my ($self, %params) = @_;
my $newattrs = $params{attributes} or die "Missing 'attributes' parameter for predict()";
my $m = $self->{model}; # For convenience
my %scores;
my @labels = @{ $self->{labels} };
$scores{$_} = $m->{labelprob}{$_} foreach (@labels);
foreach my $att (keys(%{ $newattrs })) {
if (!defined($self->{attribute_type}{$att})) { die "Unknown attribute: `$att'" }
next if $self->{attribute_type}{$att} eq 'real';
NaiveBayes1.pm view on Meta::CPAN
foreach my $label (@labels) { $scores{$label} *= $nscores{$label} }
}
}
my $sumPx = 0.0;
$sumPx += $scores{$_} foreach (keys(%scores));
$scores{$_} /= $sumPx foreach (keys(%scores));
return \%scores;
}
sub print_model {
my $self = shift;
my $withcounts = '';
if ($#_>-1 && $_[0] eq 'with counts')
{ shift @_; $withcounts = 1; }
my $m = $self->{model};
my @labels = $self->labels;
my $r;
# prepare table category P(category)
my @lines;
NaiveBayes1.pm view on Meta::CPAN
foreach my $i (0 .. $#lines)
{ $lines[$i] .= ($lines[$i]=~/-$/?'+-':'| ') . $lines2[$i] }
@lines = _append_lines(@lines);
$r .= join("\n", @lines). "\n\n";
}
return $r;
}
sub _append_lines {
my @l = @_;
my $m = 0;
foreach (@l) { $m = length($_) if length($_) > $m }
@l = map
{ while (length($_) < $m) { $_.=substr($_,length($_)-1) }; $_ }
@l;
return @l;
}
sub labels {
my $self = shift;
return @{ $self->{labels} };
}
sub attributes {
my $self = shift;
return keys %{ $self->{stat_attributes} };
}
sub export_to_YAML {
my $self = shift;
require YAML;
return YAML::Dump($self);
}
sub export_to_YAML_file {
my $self = shift;
my $file = shift;
require YAML;
YAML::DumpFile($file, $self);
}
1;
__END__
=head1 NAME
t/auxfunctions.pl view on Meta::CPAN
#!/usr/bin/perl
sub compare_by_line {
my $got = shift;
my $file = shift;
my $testfile = @_ ? shift @_ : '';
my $testline = @_ ? shift @_ : '';
my $expected = getfile($file);
if ($got eq $expected) { pass; return }
my $flag = '';
while ($got ne '' or $expected ne '') {
my $a=$got; if ($a =~ /\s*\n/) { $a = $`; $got = $'; }
my $b=$expected; if ($b =~ /\s*\n/) { $b = $`; $expected = $'; }
if ($a ne $b) {
if ($flag eq '')
{ print STDERR "\n$testfile:$testline: Failed comparison with $file!\n"; $flag = 1; }
print STDERR " Got: $a\n".
"Expected: $b\n";
}
}
if ($flag eq '') { pass } else { fail }
}
sub shorterdecimals {
local $_ = shift;
s/(\d{4}\.\d{10})\d+/$1/g;
s/(\.\d{12})\d+/$1/g;
s/---+/---/g;
return $_;
}
sub getfile($) {
my $f = shift;
local *F;
open(F, "<$f") or die "getfile:cannot open $f:$!";
my @r = <F>;
close(F);
return wantarray ? @r : join ('', @r);
}
sub putfile($@) {
my $f = shift;
local *F;
open(F, ">$f") or die "putfile:cannot open $f:$!";
print F '' unless @_;
while (@_) { print F shift(@_) }
close(F);
}
1;
( run in 0.749 second using v1.01-cache-2.11-cpan-a5abf4f5562 )