AI-NaiveBayes1

 view release on metacpan or  search on metacpan

Makefile.PL  view on Meta::CPAN

  ($ExtUtils::MakeMaker::VERSION ge '6.30_00'? 
   ('LICENSE'         => 'perl', ) : ()),
  );

open(M, ">>Makefile") or die;

if ( -f 'priv.make' ) { print M getfile('priv.make') }

close(M);

sub getfile($) {
    my $f = shift;
    local *F;
    open(F, "<$f") or die "getfile:cannot open $f:$!";
    my @r = <F>;
    close(F);
    return wantarray ? @r : join ('', @r);
}

NaiveBayes1.pm  view on Meta::CPAN

use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@EXPORT = qw(new);
use vars qw($Version);
$Version = $VERSION = '2.012';

use vars @EXPORT_OK;

# non-exported package globals go here
use vars qw();

sub new {
  my $package = shift;
  return bless {
                attributes => [ ],
		labels     => [ ],
		attvals    => {},
		real_stat  => {},
		numof_instances => 0,
		stat_labels => {},
		stat_attributes => {},
		smoothing => {},
		attribute_type => {},
	       }, $package;
}

sub set_real {
    my ($self, @attr) = @_;
    foreach my $a (@attr) { $self->{attribute_type}{$a} = 'real' }
}

sub import_from_YAML {
    my $package = shift;
    my $yaml = shift;
    my $self = YAML::Load($yaml);
    return bless $self, $package;
}

sub import_from_YAML_file {
    my $package = shift;
    my $yamlf = shift;
    my $self = YAML::LoadFile($yamlf);
    return bless $self, $package;
}

# assume that the last header count means counts
# after optionally removing counts, the last header is label
sub add_table {
    my $self = shift;
    my @atts = (); my $lbl=''; my $cnt = '';
    while (@_) {
	my $table = shift;
	if ($table =~ /^(.*)\n[ \t]*-+\n/) {
	    my $a = $1; $table = $';
	    $a =~ s/^\s+//; $a =~ s/\s+$//;
	    if ($a =~ /\s*\bcount\s*$/) {
		$a=$`; $cnt=1; } else { $cnt='' }
	    @atts = split(/\s+/, $a);

NaiveBayes1.pm  view on Meta::CPAN

	    $self->add_instances(attributes=>\%av,
				 label=>"$lbl=$v[0]",
				 cases=>($cnt?$v[1]:1) );
	}
    }
} # end of add_table

# Simplified; not generally compatible.
# Assume that the last header is label.  The first row contains
# attribute names.
sub add_csv_file {
    my $self = shift; my $fn = shift; local *F;
    open(F,$fn) or die "Cannot open CSV file `$fn': $!";
    local $_ = <F>; my @atts = (); my $lbl=''; my $cnt = '';
    chomp; @atts = split(/\s*,\s*/, $_); $lbl = pop @atts;
    while (<F>) {
	chomp; my @v = split(/\s*,\s*/, $_);
	die "values (#=$#v): {@v}\natts (#=$#atts): @atts, lbl=$lbl,\n".
	    "count: $cnt\n" unless $#v-($cnt?2:1) == $#atts;
	my %av=(); my @a = @atts;
	while (@a) { $av{shift @a} = shift(@v) }
	$self->add_instances(attributes=>\%av,
			     label=>"$lbl=$v[0]",
			     cases=>($cnt?$v[1]:1) );
    }
    close(F);
} # end of add_csv_file

sub drop_attributes {
    my $self = shift;
    foreach my $a (@_) {
	my @tmp = grep { $a ne $_ } @{ $self->{attributes} };
	$self->{attributes} = \@tmp;
	delete($self->{attvals}{$a});
	delete($self->{stat_attributes}{$a});
	delete($self->{attribute_type}{$a});
	delete($self->{real_stat}{$a});
	delete($self->{smoothing}{$a});
    }
} # end of drop_attributes

sub add_instances {
  my ($self, %params) = @_;
  for ('attributes', 'label', 'cases') {
      die "Missing required '$_' parameter" unless exists $params{$_};
  }

  if (scalar(keys(%{ $self->{stat_attributes} })) == 0) {
      foreach my $a (keys(%{$params{attributes}})) {
	  $self->{stat_attributes}{$a} = {};
	  push @{ $self->{attributes} }, $a;
	  $self->{attvals}{$a} = [ ];

NaiveBayes1.pm  view on Meta::CPAN

      { die "attribute $a not given" }
      my $attval = $params{attributes}{$a};
      if (not exists($self->{stat_attributes}{$a}{$attval})) {
	  push @{ $self->{attvals}{$a} }, $attval;
	  $self->{stat_attributes}{$a}{$attval} = {};
      }
      $self->{stat_attributes}{$a}{$attval}{$params{label}} += $params{cases};
  }
}

sub add_instance {
    my ($self, %params) = @_; $params{cases} = 1;
    $self->add_instances(%params);
}

sub train {
    my $self = shift;
    my $m = $self->{model} = {};
    
    $m->{labelprob} = {};
    foreach my $label (keys(%{$self->{stat_labels}}))
    { $m->{labelprob}{$label} = $self->{stat_labels}{$label} /
                                $self->{numof_instances} } 

    $m->{condprob} = {};
    $m->{condprobe} = {};

NaiveBayes1.pm  view on Meta::CPAN

		    $self->{stat_attributes}{$att}{$attval}{$label};
            }
        }
	foreach my $label (keys %{$m->{real_stat}{$att}}) {
	    $m->{real_stat}{$att}{$label}{stddev} =
		sqrt($m->{real_stat}{$att}{$label}{stddev} /
		     ($m->{real_stat}{$att}{$label}{count}-1)
		     );
	}
    }				# foreach real attribute
}				# end of sub train

sub predict {
  my ($self, %params) = @_;
  my $newattrs = $params{attributes} or die "Missing 'attributes' parameter for predict()";
  my $m = $self->{model};  # For convenience
  
  my %scores;
  my @labels = @{ $self->{labels} };
  $scores{$_} = $m->{labelprob}{$_} foreach (@labels);
  foreach my $att (keys(%{ $newattrs })) {
      if (!defined($self->{attribute_type}{$att})) { die "Unknown attribute: `$att'" }
      next if $self->{attribute_type}{$att} eq 'real';

NaiveBayes1.pm  view on Meta::CPAN

	  foreach my $label (@labels) { $scores{$label} *= $nscores{$label} }
      }
  }

  my $sumPx = 0.0;
  $sumPx += $scores{$_} foreach (keys(%scores));
  $scores{$_} /= $sumPx foreach (keys(%scores));
  return \%scores;
}

sub print_model {
    my $self = shift;
    my $withcounts = '';
    if ($#_>-1 && $_[0] eq 'with counts')
    { shift @_; $withcounts = 1; }
    my $m = $self->{model};
    my @labels = $self->labels;
    my $r;

    # prepare table category P(category)
    my @lines;

NaiveBayes1.pm  view on Meta::CPAN

	foreach my $i (0 .. $#lines)
	{ $lines[$i] .= ($lines[$i]=~/-$/?'+-':'| ') . $lines2[$i] }
	@lines = _append_lines(@lines);

	$r .= join("\n", @lines). "\n\n";
    }

    return $r;
}

sub _append_lines {
    my @l = @_;
    my $m = 0;
    foreach (@l) { $m = length($_) if length($_) > $m }
    @l = map 
    { while (length($_) < $m) { $_.=substr($_,length($_)-1) }; $_ }
    @l;
    return @l;
}

sub labels {
  my $self = shift;
  return @{ $self->{labels} };
}

sub attributes {
  my $self = shift;
  return keys %{ $self->{stat_attributes} };
}

sub export_to_YAML {
    my $self = shift;
    require YAML;
    return YAML::Dump($self);
}

sub export_to_YAML_file {
    my $self = shift;
    my $file = shift;
    require YAML;
    YAML::DumpFile($file, $self);
}

1;
__END__

=head1 NAME

t/auxfunctions.pl  view on Meta::CPAN

#!/usr/bin/perl

sub compare_by_line {
    my $got = shift;
    my $file = shift;
    my $testfile = @_ ? shift @_ : '';
    my $testline = @_ ? shift @_ : '';
    my $expected = getfile($file);
    if ($got eq $expected) { pass; return }
    my $flag = '';
    while ($got ne '' or $expected ne '') {
	my $a=$got;      if ($a =~ /\s*\n/) { $a = $`; $got = $'; }
	my $b=$expected; if ($b =~ /\s*\n/) { $b = $`; $expected = $'; }
	if ($a ne $b) {
	    if ($flag eq '')
	    { print STDERR "\n$testfile:$testline: Failed comparison with $file!\n"; $flag = 1; }
	    print STDERR "     Got: $a\n".
                 	 "Expected: $b\n";
	}
    }
    if ($flag eq '') { pass } else { fail }
}

sub shorterdecimals {
    local $_ = shift;
    s/(\d{4}\.\d{10})\d+/$1/g;
    s/(\.\d{12})\d+/$1/g;
    s/---+/---/g;
    return $_;
}

sub getfile($) {
    my $f = shift;
    local *F;
    open(F, "<$f") or die "getfile:cannot open $f:$!";
    my @r = <F>;
    close(F);
    return wantarray ? @r : join ('', @r);
}

sub putfile($@) {
    my $f = shift;
    local *F;
    open(F, ">$f") or die "putfile:cannot open $f:$!";
    print F '' unless @_;
    while (@_) { print F shift(@_) }
    close(F);
}

1;



( run in 0.749 second using v1.01-cache-2.11-cpan-a5abf4f5562 )