AI-NaiveBayes1

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

1.9   Tue Aug 31 09:27:51 ADT 2010
  - fixed testing problems due to differences in precision in t/2.t

1.8   Fri Aug 21 06:36:34 ADT 2009
  - fixed a pod documentation error

1.7   Thu Aug 20 14:20:15 ADT 2009
  - improvements in documentation
  - added method add_csv_file
  - added method drop_attributes
  - removed real_attr and added attribute_type field

1.6   Wed Aug 19 09:09:57 ADT 2009
  - improved an error message
  - fixed some testing problems due to whitespace
  - small improvement in generating documentation

1.5   Wed Jan 30 08:06:22 AST 2008
  - fixed testing problems due to differences in the lowest
    significant digit on different platforms

NaiveBayes1.pm  view on Meta::CPAN

  my $package = shift;
  return bless {
                attributes => [ ],
		labels     => [ ],
		attvals    => {},
		real_stat  => {},
		numof_instances => 0,
		stat_labels => {},
		stat_attributes => {},
		smoothing => {},
		attribute_type => {},
	       }, $package;
}

sub set_real {
    my ($self, @attr) = @_;
    foreach my $a (@attr) { $self->{attribute_type}{$a} = 'real' }
}

sub import_from_YAML {
    my $package = shift;
    my $yaml = shift;
    my $self = YAML::Load($yaml);
    return bless $self, $package;
}

sub import_from_YAML_file {

NaiveBayes1.pm  view on Meta::CPAN

    close(F);
} # end of add_csv_file

sub drop_attributes {
    my $self = shift;
    foreach my $a (@_) {
	my @tmp = grep { $a ne $_ } @{ $self->{attributes} };
	$self->{attributes} = \@tmp;
	delete($self->{attvals}{$a});
	delete($self->{stat_attributes}{$a});
	delete($self->{attribute_type}{$a});
	delete($self->{real_stat}{$a});
	delete($self->{smoothing}{$a});
    }
} # end of drop_attributes

sub add_instances {
  my ($self, %params) = @_;
  for ('attributes', 'label', 'cases') {
      die "Missing required '$_' parameter" unless exists $params{$_};
  }

  if (scalar(keys(%{ $self->{stat_attributes} })) == 0) {
      foreach my $a (keys(%{$params{attributes}})) {
	  $self->{stat_attributes}{$a} = {};
	  push @{ $self->{attributes} }, $a;
	  $self->{attvals}{$a} = [ ];
	  $self->{attribute_type}{$a} = 'nominal' unless defined($self->{attribute_type}{$a});
      }
  } else {
      foreach my $a (keys(%{$self->{stat_attributes}}))
      { die "attribute not given in instance: $a"
	    unless exists($params{attributes}{$a}) }
  }

  $self->{numof_instances} += $params{cases};

  push @{ $self->{labels} }, $params{label} unless

NaiveBayes1.pm  view on Meta::CPAN

    my $m = $self->{model} = {};
    
    $m->{labelprob} = {};
    foreach my $label (keys(%{$self->{stat_labels}}))
    { $m->{labelprob}{$label} = $self->{stat_labels}{$label} /
                                $self->{numof_instances} } 

    $m->{condprob} = {};
    $m->{condprobe} = {};
    foreach my $att (keys(%{$self->{stat_attributes}})) {
        next if $self->{attribute_type}{$att} eq 'real';
	$m->{condprob}{$att} = {};
	$m->{condprobe}{$att} = {};
	foreach my $label (keys(%{$self->{stat_labels}})) {
	    my $total = 0; my @attvals = ();
	    foreach my $attval (keys(%{$self->{stat_attributes}{$att}})) {
		next unless
		    exists($self->{stat_attributes}{$att}{$attval}{$label}) and
		    $self->{stat_attributes}{$att}{$attval}{$label} > 0;
		push @attvals, $attval;
		$m->{condprob}{$att}{$attval} = {} unless

NaiveBayes1.pm  view on Meta::CPAN

		$m->{condprobe}{$att}{$attval}{$label} =
		    "(= $m->{condprob}{$att}{$attval}{$label} / $total)";
		$m->{condprob}{$att}{$attval}{$label} /= $total;
	    }
	}
    }

    # For real-valued attributes, we use Gaussian distribution
    # let us collect statistics
    foreach my $att (keys(%{$self->{stat_attributes}})) {
        next unless $self->{attribute_type}{$att} eq 'real';
	print STDERR "Smoothing ignored for real attribute $att!\n" if
	    defined($self->{smoothing}{att}) and $self->{smoothing}{att};
        $m->{real_stat}->{$att} = {};
        foreach my $attval (keys %{$self->{stat_attributes}{$att}}){
            foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
                $m->{real_stat}{$att}{$label}{sum}
                += $attval * $self->{stat_attributes}{$att}{$attval}{$label};

                $m->{real_stat}{$att}{$label}{count}
                += $self->{stat_attributes}{$att}{$attval}{$label};

NaiveBayes1.pm  view on Meta::CPAN


sub predict {
  my ($self, %params) = @_;
  my $newattrs = $params{attributes} or die "Missing 'attributes' parameter for predict()";
  my $m = $self->{model};  # For convenience
  
  my %scores;
  my @labels = @{ $self->{labels} };
  $scores{$_} = $m->{labelprob}{$_} foreach (@labels);
  foreach my $att (keys(%{ $newattrs })) {
      if (!defined($self->{attribute_type}{$att})) { die "Unknown attribute: `$att'" }
      next if $self->{attribute_type}{$att} eq 'real';
      die unless exists($self->{stat_attributes}{$att});
      my $attval = $newattrs->{$att};
      die "Unknown value `$attval' for attribute `$att'."
      unless exists($self->{stat_attributes}{$att}{$attval}) or
	  exists($self->{smoothing}{$att});
      foreach my $label (@labels) {
	  if (exists($m->{condprob}{$att}{$attval}) and
	      exists($m->{condprob}{$att}{$attval}{$label}) and
	      $m->{condprob}{$att}{$attval}{$label} > 0 ) {
	      $scores{$label} *=
		  $m->{condprob}{$att}{$attval}{$label};
	  } elsif (exists($self->{smoothing}{$att})) {
	      $scores{$label} *=
                  $m->{condprob}{$att}{'*'}{$label};
	  } else { $scores{$label} = 0 }

      }
  }

  foreach my $att (keys %{$newattrs}){
      next unless $self->{attribute_type}{$att} eq 'real';
      my $sum=0; my %nscores;
      foreach my $label (@labels) {
	  die unless exists $m->{real_stat}{$att}{$label}{mean};
	  $nscores{$label} =
              0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
              exp( -0.5 *
                  ( ( $newattrs->{$att} -
                      $m->{real_stat}{$att}{$label}{mean})
                    / $m->{real_stat}{$att}{$label}{stddev}
                  ) ** 2

NaiveBayes1.pm  view on Meta::CPAN

    $r .= join("\n", @lines) . "\n". $lines[1]. "\n\n";

    # prepare conditional tables
    my @attributes = sort $self->attributes;
    foreach my $att (@attributes) {
	@lines = ( "category ", '-' );
	my @lines1 = ( "$att ", '-' );
	my @lines2 = ( "P( $att | category ) ", '-' );
	my @attvals = sort keys(%{ $m->{condprob}{$att} });
	foreach my $label (@labels) {
	    if ( $self->{attribute_type}{$att} ne 'real' ) {
		foreach my $attval (@attvals) {
		    next unless exists($m->{condprob}{$att}{$attval}{$label});
		    push @lines, "$label ";
		    push @lines1, "$attval ";

		    my $line = $m->{condprob}{$att}{$attval}{$label};
		    if ($withcounts)
		    { $line.= ' '.$m->{condprobe}{$att}{$attval}{$label} }
		    $line .= ' ';
		    push @lines2, $line;

NaiveBayes1.pm  view on Meta::CPAN

=head2 Data Structure

An object contains the following fields:

=over 4

=item C<{attributes}>

List of attribute names.

=item C<{attribute_type}{$a}>

Attribute types - 'real', or not (e.g., 'nominal')

=item C<{labels}>

List of labels.

=item C<{attvals}{$a}>

List of attribute values

=item C<{real_stat}{$a}{$v}{$l}{sum}>

README  view on Meta::CPAN

AI/NaiveBayes1 version 2.011
============================

This is an implementation of the basic Naive Bayes classification
algorithm.  Documentation is a part of the file NaiveBayes1.pm in the
pod format.  To convert it into manual page format, type something
like the following:

	  pod2html NaiveBayes1.pm

INSTALLATION

To install this module type the following:

   perl Makefile.PL
   make
   make test
   make install

DEPENDENCIES

The methods for importing to and exporting from YAML format require
module YAML.

t/5-2.out  view on Meta::CPAN

--- !!perl/hash:AI::NaiveBayes1
attribute_type:
  html: nominal
  morning: nominal
  size: real
attributes:
  - morning
  - html
  - size
attvals:
  html:
    - N



( run in 1.471 second using v1.01-cache-2.11-cpan-df04353d9ac )