AI-NaiveBayes1
view release on metacpan or search on metacpan
1.9 Tue Aug 31 09:27:51 ADT 2010
- fixed testing problems due to differences in precision in t/2.t
1.8 Fri Aug 21 06:36:34 ADT 2009
- fixed a pod documentation error
1.7 Thu Aug 20 14:20:15 ADT 2009
- improvements in documentation
- added method add_csv_file
- added method drop_attributes
- removed real_attr and added attribute_type field
1.6 Wed Aug 19 09:09:57 ADT 2009
- improved an error message
- fixed some testing problems due to whitespace
- small improvement in generating documentation
1.5 Wed Jan 30 08:06:22 AST 2008
- fixed testing problems due to differences in the lowest
significant digit on different platforms
NaiveBayes1.pm view on Meta::CPAN
my $package = shift;
return bless {
attributes => [ ],
labels => [ ],
attvals => {},
real_stat => {},
numof_instances => 0,
stat_labels => {},
stat_attributes => {},
smoothing => {},
attribute_type => {},
}, $package;
}
sub set_real {
my ($self, @attr) = @_;
foreach my $a (@attr) { $self->{attribute_type}{$a} = 'real' }
}
sub import_from_YAML {
my $package = shift;
my $yaml = shift;
my $self = YAML::Load($yaml);
return bless $self, $package;
}
sub import_from_YAML_file {
NaiveBayes1.pm view on Meta::CPAN
close(F);
} # end of add_csv_file
sub drop_attributes {
my $self = shift;
foreach my $a (@_) {
my @tmp = grep { $a ne $_ } @{ $self->{attributes} };
$self->{attributes} = \@tmp;
delete($self->{attvals}{$a});
delete($self->{stat_attributes}{$a});
delete($self->{attribute_type}{$a});
delete($self->{real_stat}{$a});
delete($self->{smoothing}{$a});
}
} # end of drop_attributes
sub add_instances {
my ($self, %params) = @_;
for ('attributes', 'label', 'cases') {
die "Missing required '$_' parameter" unless exists $params{$_};
}
if (scalar(keys(%{ $self->{stat_attributes} })) == 0) {
foreach my $a (keys(%{$params{attributes}})) {
$self->{stat_attributes}{$a} = {};
push @{ $self->{attributes} }, $a;
$self->{attvals}{$a} = [ ];
$self->{attribute_type}{$a} = 'nominal' unless defined($self->{attribute_type}{$a});
}
} else {
foreach my $a (keys(%{$self->{stat_attributes}}))
{ die "attribute not given in instance: $a"
unless exists($params{attributes}{$a}) }
}
$self->{numof_instances} += $params{cases};
push @{ $self->{labels} }, $params{label} unless
NaiveBayes1.pm view on Meta::CPAN
my $m = $self->{model} = {};
$m->{labelprob} = {};
foreach my $label (keys(%{$self->{stat_labels}}))
{ $m->{labelprob}{$label} = $self->{stat_labels}{$label} /
$self->{numof_instances} }
$m->{condprob} = {};
$m->{condprobe} = {};
foreach my $att (keys(%{$self->{stat_attributes}})) {
next if $self->{attribute_type}{$att} eq 'real';
$m->{condprob}{$att} = {};
$m->{condprobe}{$att} = {};
foreach my $label (keys(%{$self->{stat_labels}})) {
my $total = 0; my @attvals = ();
foreach my $attval (keys(%{$self->{stat_attributes}{$att}})) {
next unless
exists($self->{stat_attributes}{$att}{$attval}{$label}) and
$self->{stat_attributes}{$att}{$attval}{$label} > 0;
push @attvals, $attval;
$m->{condprob}{$att}{$attval} = {} unless
NaiveBayes1.pm view on Meta::CPAN
$m->{condprobe}{$att}{$attval}{$label} =
"(= $m->{condprob}{$att}{$attval}{$label} / $total)";
$m->{condprob}{$att}{$attval}{$label} /= $total;
}
}
}
# For real-valued attributes, we use Gaussian distribution
# let us collect statistics
foreach my $att (keys(%{$self->{stat_attributes}})) {
next unless $self->{attribute_type}{$att} eq 'real';
print STDERR "Smoothing ignored for real attribute $att!\n" if
defined($self->{smoothing}{att}) and $self->{smoothing}{att};
$m->{real_stat}->{$att} = {};
foreach my $attval (keys %{$self->{stat_attributes}{$att}}){
foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
$m->{real_stat}{$att}{$label}{sum}
+= $attval * $self->{stat_attributes}{$att}{$attval}{$label};
$m->{real_stat}{$att}{$label}{count}
+= $self->{stat_attributes}{$att}{$attval}{$label};
NaiveBayes1.pm view on Meta::CPAN
sub predict {
my ($self, %params) = @_;
my $newattrs = $params{attributes} or die "Missing 'attributes' parameter for predict()";
my $m = $self->{model}; # For convenience
my %scores;
my @labels = @{ $self->{labels} };
$scores{$_} = $m->{labelprob}{$_} foreach (@labels);
foreach my $att (keys(%{ $newattrs })) {
if (!defined($self->{attribute_type}{$att})) { die "Unknown attribute: `$att'" }
next if $self->{attribute_type}{$att} eq 'real';
die unless exists($self->{stat_attributes}{$att});
my $attval = $newattrs->{$att};
die "Unknown value `$attval' for attribute `$att'."
unless exists($self->{stat_attributes}{$att}{$attval}) or
exists($self->{smoothing}{$att});
foreach my $label (@labels) {
if (exists($m->{condprob}{$att}{$attval}) and
exists($m->{condprob}{$att}{$attval}{$label}) and
$m->{condprob}{$att}{$attval}{$label} > 0 ) {
$scores{$label} *=
$m->{condprob}{$att}{$attval}{$label};
} elsif (exists($self->{smoothing}{$att})) {
$scores{$label} *=
$m->{condprob}{$att}{'*'}{$label};
} else { $scores{$label} = 0 }
}
}
foreach my $att (keys %{$newattrs}){
next unless $self->{attribute_type}{$att} eq 'real';
my $sum=0; my %nscores;
foreach my $label (@labels) {
die unless exists $m->{real_stat}{$att}{$label}{mean};
$nscores{$label} =
0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
exp( -0.5 *
( ( $newattrs->{$att} -
$m->{real_stat}{$att}{$label}{mean})
/ $m->{real_stat}{$att}{$label}{stddev}
) ** 2
NaiveBayes1.pm view on Meta::CPAN
$r .= join("\n", @lines) . "\n". $lines[1]. "\n\n";
# prepare conditional tables
my @attributes = sort $self->attributes;
foreach my $att (@attributes) {
@lines = ( "category ", '-' );
my @lines1 = ( "$att ", '-' );
my @lines2 = ( "P( $att | category ) ", '-' );
my @attvals = sort keys(%{ $m->{condprob}{$att} });
foreach my $label (@labels) {
if ( $self->{attribute_type}{$att} ne 'real' ) {
foreach my $attval (@attvals) {
next unless exists($m->{condprob}{$att}{$attval}{$label});
push @lines, "$label ";
push @lines1, "$attval ";
my $line = $m->{condprob}{$att}{$attval}{$label};
if ($withcounts)
{ $line.= ' '.$m->{condprobe}{$att}{$attval}{$label} }
$line .= ' ';
push @lines2, $line;
NaiveBayes1.pm view on Meta::CPAN
=head2 Data Structure
An object contains the following fields:
=over 4
=item C<{attributes}>
List of attribute names.
=item C<{attribute_type}{$a}>
Attribute types - 'real', or not (e.g., 'nominal')
=item C<{labels}>
List of labels.
=item C<{attvals}{$a}>
List of attribute values
=item C<{real_stat}{$a}{$v}{$l}{sum}>
AI/NaiveBayes1 version 2.011
============================
This is an implementation of the basic Naive Bayes classification
algorithm. Documentation is a part of the file NaiveBayes1.pm in the
pod format. To convert it into manual page format, type something
like the following:
pod2html NaiveBayes1.pm
INSTALLATION
To install this module type the following:
perl Makefile.PL
make
make test
make install
DEPENDENCIES
The methods for importing to and exporting from YAML format require
module YAML.
--- !!perl/hash:AI::NaiveBayes1
attribute_type:
html: nominal
morning: nominal
size: real
attributes:
- morning
- html
- size
attvals:
html:
- N
( run in 0.527 second using v1.01-cache-2.11-cpan-df04353d9ac )