AI-NaiveBayes1

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

  - fixed testing problems due to differences in the lowest
    significant digit on different platforms

1.4   Fri Dec 14 11:42:16 AST 2007
  - added test skipping if YAML module is not available
  - removing deprecated "our" reserved word

1.3   Fri Dec  7 07:26:47 AST 2007
  - added reading from a table format (add_table)
  - fixed some warnings reported during testing
  - added option "with counts" to print_model
  - added smoothing option for attributes

1.2   Mon Mar 14 08:03:16 AST 2005
  - addition to documentation
  - fixing a minor bug and a warning

1.1   Tue Apr 20 08:07:33 ADT 2004
  - added several more test
  - implemented optional Gaussian distribution for numerical
    attributes

1.0   Thu Sep  4 08:01:19 ADT 2003
  - bug fix in testing (attributes and labels are sorted now in
    print_model)

0.03  Wed Sep  3 08:25:43 ADT 2003
  - bug fix
  - import and export to/from string and file (using YAML)

0.02  Fri May  9 15:53:53 ADT 2003
  - bug fix

0.01  Fri May  9 14:48:53 ADT 2003
  - original version;

Makefile.PL  view on Meta::CPAN

      AUTHOR => 'Vlado Keselj https://web.cs.dal.ca/~vlado')
   #-
   : ()
   ),
  ($ExtUtils::MakeMaker::VERSION ge '6.30_00'? 
   ('LICENSE'         => 'perl', ) : ()),
  );

open(M, ">>Makefile") or die;

if ( -f 'priv.make' ) { print M getfile('priv.make') }

close(M);

sub getfile($) {
    my $f = shift;
    local *F;
    open(F, "<$f") or die "getfile:cannot open $f:$!";
    my @r = <F>;
    close(F);
    return wantarray ? @r : join ('', @r);

NaiveBayes1.pm  view on Meta::CPAN

		    "(= $m->{condprob}{$att}{$attval}{$label} / $total)";
		$m->{condprob}{$att}{$attval}{$label} /= $total;
	    }
	}
    }

    # For real-valued attributes, we use Gaussian distribution
    # let us collect statistics
    foreach my $att (keys(%{$self->{stat_attributes}})) {
        next unless $self->{attribute_type}{$att} eq 'real';
	print STDERR "Smoothing ignored for real attribute $att!\n" if
	    defined($self->{smoothing}{att}) and $self->{smoothing}{att};
        $m->{real_stat}->{$att} = {};
        foreach my $attval (keys %{$self->{stat_attributes}{$att}}){
            foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
                $m->{real_stat}{$att}{$label}{sum}
                += $attval * $self->{stat_attributes}{$att}{$attval}{$label};

                $m->{real_stat}{$att}{$label}{count}
                += $self->{stat_attributes}{$att}{$attval}{$label};
            }

NaiveBayes1.pm  view on Meta::CPAN

	  $nscores{$label} =
              0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
              exp( -0.5 *
                  ( ( $newattrs->{$att} -
                      $m->{real_stat}{$att}{$label}{mean})
                    / $m->{real_stat}{$att}{$label}{stddev}
                  ) ** 2
		 );
	  $sum += $nscores{$label};
      }
      if ($sum==0) { print STDERR "Ignoring all Gaussian probabilities: all=0!\n" }
      else {
	  foreach my $label (@labels) { $scores{$label} *= $nscores{$label} }
      }
  }

  my $sumPx = 0.0;
  $sumPx += $scores{$_} foreach (keys(%scores));
  $scores{$_} /= $sumPx foreach (keys(%scores));
  return \%scores;
}

sub print_model {
    my $self = shift;
    my $withcounts = '';
    if ($#_>-1 && $_[0] eq 'with counts')
    { shift @_; $withcounts = 1; }
    my $m = $self->{model};
    my @labels = $self->labels;
    my $r;

    # prepare table category P(category)
    my @lines;

NaiveBayes1.pm  view on Meta::CPAN

     N     Y     Y     N    18   
     N     Y     N     Y    16   
     N     Y     N     N    16   
     N     N     Y     Y     2   
     N     N     Y     N     9   
     N     N     N     Y    11   
     N     N     N     N    91   
  -------------------------------
  ");
  $nb->train;
  print "Model:\n" . $nb->print_model;
  print "Model (with counts):\n" . $nb->print_model('with counts');

  $nb = AI::NaiveBayes1->new;
  $nb->add_instances(attributes=>{model=>'H',place=>'B'},
		     label=>'repairs=Y',cases=>30);
  $nb->add_instances(attributes=>{model=>'H',place=>'B'},
		     label=>'repairs=N',cases=>10);
  $nb->add_instances(attributes=>{model=>'H',place=>'N'},
		     label=>'repairs=Y',cases=>18);
  $nb->add_instances(attributes=>{model=>'H',place=>'N'},
		     label=>'repairs=N',cases=>16);

NaiveBayes1.pm  view on Meta::CPAN

		     label=>'repairs=Y',cases=>22);
  $nb->add_instances(attributes=>{model=>'T',place=>'B'},
		     label=>'repairs=N',cases=>14);
  $nb->add_instances(attributes=>{model=>'T',place=>'N'},
		     label=>'repairs=Y',cases=> 6);
  $nb->add_instances(attributes=>{model=>'T',place=>'N'},
		     label=>'repairs=N',cases=>84);

  $nb->train;

  print "Model:\n" . $nb->print_model;
  
  # Find results for unseen instances
  my $result = $nb->predict
     (attributes => {model=>'T', place=>'N'});

  foreach my $k (keys(%{ $result })) {
      print "for label $k P = " . $result->{$k} . "\n";
  }

  # export the model into a string
  my $string = $nb->export_to_YAML();

  # create the same model from the string
  my $nb1 = AI::NaiveBayes1->import_from_YAML($string);

  # write the model to a file (shorter than model->string->file)
  $nb->export_to_YAML_file('t/tmp1');

NaiveBayes1.pm  view on Meta::CPAN

=item export_to_YAML()

Returns a C<YAML> string representation of an C<AI::NaiveBayes1>
object.  Requires YAML module.

=item C<export_to_YAML_file( $file_name )>

Writes a C<YAML> string representation of an C<AI::NaiveBayes1>
object to a file.  Requires YAML module.

=item C<print_model( OPTIONAL 'with counts' )>

Returns a string, human-friendly representation of the model.
The model is supposed to be trained before calling this method.
One argument 'with counts' can be supplied, in which case explanatory
expressions with counts are printed as well.

=item train()

Calculates the probabilities that will be necessary for categorization
using the C<predict()> method.

=item C<predict( attributes =E<gt> HASH )>

Use this method to predict the label of an unknown instance.  The
attributes should be of the same format as you passed to

NaiveBayes1.pm  view on Meta::CPAN

 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'rainy',temperature=>75,humidity=>80,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>75,humidity=>70,windy=>'TRUE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'overcast',temperature=>72,humidity=>90,windy=>'TRUE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'overcast',temperature=>81,humidity=>75,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'rainy',temperature=>71,humidity=>91,windy=>'TRUE'},label=>'play=no');
 
 $nb->train;
 
 my $printedmodel =  "Model:\n" . $nb->print_model;
 my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>66,humidity=>90,windy=>'TRUE'});

 YAML::DumpFile('file', $p);
 die unless (abs($p->{'play=no'}  - 0.792) < 0.001);
 die unless(abs($p->{'play=yes'} - 0.208) < 0.001);

=head1 HISTORY

L<Algorithm::NaiveBayes> by Ken Williams was not what I needed so I
wrote this one.  L<Algorithm::NaiveBayes> is oriented towards text

t/1.t  view on Meta::CPAN

$nb->add_instances(attributes=>{model=>'H',place=>'B'},label=>'repairs=N',cases=>10);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},label=>'repairs=Y',cases=>18);
$nb->add_instances(attributes=>{model=>'H',place=>'N'},label=>'repairs=N',cases=>16);
$nb->add_instances(attributes=>{model=>'T',place=>'B'},label=>'repairs=Y',cases=>22);
$nb->add_instances(attributes=>{model=>'T',place=>'B'},label=>'repairs=N',cases=>14);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},label=>'repairs=Y',cases=> 6);
$nb->add_instances(attributes=>{model=>'T',place=>'N'},label=>'repairs=N',cases=>84);

$nb->train;

my $printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/1-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/1-1.out');

#putfile('t/1-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/1-2.out'));

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 1.t" if $@;

$nb->export_to_YAML_file('t/tmp1');

my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp1');

$printedmodel = &shorterdecimals($nb1->print_model);
#is("Model:\n" . $printedmodel, getfile('t/1-1.out'));
&compare_by_line("Model:\n" . $printedmodel, 't/1-1.out');

my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
$printedmodel = &shorterdecimals($nb2->print_model);
#is("Model:\n" . $printedmodel, getfile('t/1-1.out'));
&compare_by_line("Model:\n" . $printedmodel, 't/1-1.out');

t/2.t  view on Meta::CPAN

$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size1=>'L'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size1=>'L'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size1=>'S'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size1=>'S'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size1=>'L'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size1=>'S'},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'N',size1=>'S'},label=>'spam=Y');

$nb->train;

my $printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/2-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/2-1.out', __FILE__ , __LINE__);

#putfile('t/2-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/2-2.out'));

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 2.t" if $@;

$nb->export_to_YAML_file('t/tmp2');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp2');
$printedmodel = "Model:\n" . $nb1->print_model;
$printedmodel = &shorterdecimals($printedmodel);
&compare_by_line($printedmodel, 't/2-1.out', __FILE__ , __LINE__);
#is("Model:\n" . $nb1->print_model, getfile('t/2-1.out'));

my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
$printedmodel = "Model:\n" . $nb2->print_model;
$printedmodel = &shorterdecimals($printedmodel);
&compare_by_line($printedmodel, 't/2-1.out', __FILE__ , __LINE__);
#is("Model:\n" . $nb2->print_model, getfile('t/2-1.out'));

my $p = $nb->predict(attributes=>{morning=>'Y',html=>'Y',size1=>'L'});

#putfile('t/2-3.out', YAML::Dump($p));
like($p->{'spam=N'}, qr/0\.0627/);
like($p->{'spam=Y'}, qr/0\.9372/);

t/3.t  view on Meta::CPAN

$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>'mild',humidity=>'high',windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>'cool',humidity=>'normal',windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>'mild',humidity=>'normal',windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>'mild',humidity=>'normal',windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>'mild',humidity=>'high',windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>'hot',humidity=>'normal',windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>'mild',humidity=>'high',windy=>'TRUE'},label=>'play=no');

$nb->train;

my $printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);

#putfile('t/3-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/3-1.out', __FILE__, __LINE__);

#putfile('t/3-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/3-2.out'));

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 3.t" if $@;

$nb->export_to_YAML_file('t/tmp3');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp3');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
		 't/3-1.out', __FILE__, __LINE__);

my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
		 't/3-1.out', __FILE__, __LINE__);

my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>'cool',humidity=>'high',windy=>'TRUE'});

#putfile('t/3-3.out', YAML::Dump($p));
ok(abs($p->{'play=no'}  - 0.795) < 0.001);
ok(abs($p->{'play=yes'} - 0.205) < 0.001);

t/4.t  view on Meta::CPAN

$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>75,humidity=>80,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>75,humidity=>70,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>72,humidity=>90,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>81,humidity=>75,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>71,humidity=>91,windy=>'TRUE'},label=>'play=no');

$nb->train;

my $printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);

#putfile('t/4-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/4-1.out', __FILE__, __LINE__);

#putfile('t/4-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/4-2.out'));

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 4.t" if $@;

$nb->export_to_YAML_file('t/tmp4');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp4');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
		 't/4-1.out', __FILE__, __LINE__);

my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
		 't/4-1.out', __FILE__, __LINE__);

my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>66,humidity=>90,windy=>'TRUE'});

#putfile('t/4-3.out', YAML::Dump($p));
ok(abs($p->{'play=no'}  - 0.792) < 0.001);
ok(abs($p->{'play=yes'} - 0.208) < 0.001);

t/5.t  view on Meta::CPAN

$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>2176},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>877},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>272},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>2740},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>514},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'N',size=>1321},label=>'spam=Y');

$nb->set_real('size');
$nb->train;

my $printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);

#putfile('t/5-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/5-1.out', __FILE__, __LINE__);

#putfile('t/5-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/5-2.out'));

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 5.t" if $@;

$nb->export_to_YAML_file('t/tmp5');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp5');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
		 't/5-1.out', __FILE__, __LINE__);

my $tmp = $nb->export_to_YAML();
my $nb2 = AI::NaiveBayes1->import_from_YAML($tmp);
&compare_by_line("Model:\n" . &shorterdecimals($nb2->print_model),
		 't/5-1.out', __FILE__, __LINE__);

my $p = $nb->predict(attributes=>{morning=>'Y',html=>'Y',size=>4749});

#putfile('t/5-3.out', YAML::Dump($p));
ok(abs($p->{'spam=N'} - 0.043) < 0.001);
ok(abs($p->{'spam=Y'} - 0.957) < 0.001);

t/6.t  view on Meta::CPAN

$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=N',cases=>10);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);

$nb->train;

my $printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);

#putfile('t/6-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/6-1.out', __FILE__, __LINE__);

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 6.t" if $@;

$nb->export_to_YAML_file('t/tmp6');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp6');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
		 't/6-1.out', __FILE__, __LINE__);

my $p = $nb->predict(attributes=>{C=>'Y',F=>0});

#putfile('t/6-2.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.580) < 0.001);
ok(abs($p->{'S=Y'} - 0.420) < 0.001);

# Continual

t/6.t  view on Meta::CPAN

$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);

$nb->set_real('F');
$nb->train;

$printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);

#putfile('t/6-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/6-3.out', __FILE__, __LINE__);

$nb->export_to_YAML_file('t/tmp6-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp6-2');

&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
		 't/6-3.out', __FILE__, __LINE__);

$p = $nb->predict(attributes=>{C=>'Y',F=>1});

#putfile('t/6-4.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.339) < 0.001);
ok(abs($p->{'S=Y'} - 0.661) < 0.001);

t/7.t  view on Meta::CPAN

$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=N',cases=>10);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);

$nb->train;

my $printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);

#putfile('t/7-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-1.out', __FILE__, __LINE__);

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 7.t" if $@;

$nb->export_to_YAML_file('t/tmp7');
my $nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp7');
&compare_by_line("Model:\n" . &shorterdecimals($nb1->print_model),
		 't/7-1.out', __FILE__, __LINE__);

my $p = $nb->predict(attributes=>{C=>'Y',F=>0});

#putfile('t/7-2.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.580) < 0.001);
ok(abs($p->{'S=Y'} - 0.420) < 0.001);

# Continual

t/7.t  view on Meta::CPAN

#$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->add_table("  C   F   S  count\n".
               "------------------\n".
	       "  N   0   Y    6  \n".
	       "  N   0   N   84  \n".
	       '');

$nb->set_real('F');
$nb->train;

$printedmodel =  &shorterdecimals("Model:\n" . $nb->print_model);

#putfile('t/7-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-3.out', __FILE__, __LINE__);

$nb->export_to_YAML_file('t/tmp7-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp7-2');

&compare_by_line(&shorterdecimals("Model:\n" . $nb1->print_model),
		 't/7-3.out', __FILE__, __LINE__);

$p = $nb->predict(attributes=>{C=>'Y',F=>1});

#putfile('t/7-4.out', YAML::Dump($p));
ok(abs($p->{'S=N'} - 0.339) < 0.001);
ok(abs($p->{'S=Y'} - 0.661) < 0.001);

$nb = AI::NaiveBayes1->new;
$nb->add_table(

t/7.t  view on Meta::CPAN

   N     Y     Y     N    18   
   N     Y     N     Y    16   
   N     Y     N     N    16   
   N     N     Y     Y     2   
   N     N     Y     N     9   
   N     N     N     Y    11   
   N     N     N     N    91   
-------------------------------
");
$nb->train;
$printedmodel =  "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/7-5.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-5.out', __FILE__, __LINE__);

$p = $nb->predict(attributes=>{Html=>'N',Caps=>'N',Free=>'Y'});
# putfile('t/7-2.out', YAML::Dump($p));

ok(abs($p->{'Spam=N'} - 0.6580) < 0.001);
ok(abs($p->{'Spam=Y'} - 0.3420) < 0.001);

t/8.t  view on Meta::CPAN

   N    fly   fly   END   V   1
   PREV PREV  fly   duck  V   2
   V    fly   duck  END   N   2
  -------------------------------
");

$nb->{smoothing}{W}  = 'unseen count=0.5';
$nb->{smoothing}{Wp} = 'unseen count=0.5';
$nb->{smoothing}{Wf} = 'unseen count=0.5';
$nb->train;
my $printedmodel =  "Model:\n" . $nb->print_model('with counts');

putfile('t/8-1.out', $printedmodel);
is($printedmodel, getfile('t/8-1.out'));

eval "require YAML;";
plan skip_all => "YAML module required for the remaining tests in 8.t" if $@;

use YAML;

# duck ducks fly flies
my $p = $nb->predict(attributes=>{Tp=>'PREV',Wp=>'PREV',W=>'duck',Wf=>'ducks'});
putfile('t/8-2.out', YAML::Dump($p));
#ok(abs($p->{'S=N'} - 0.580) < 0.001);

t/auxfunctions.pl  view on Meta::CPAN

    my $testfile = @_ ? shift @_ : '';
    my $testline = @_ ? shift @_ : '';
    my $expected = getfile($file);
    if ($got eq $expected) { pass; return }
    my $flag = '';
    while ($got ne '' or $expected ne '') {
	my $a=$got;      if ($a =~ /\s*\n/) { $a = $`; $got = $'; }
	my $b=$expected; if ($b =~ /\s*\n/) { $b = $`; $expected = $'; }
	if ($a ne $b) {
	    if ($flag eq '')
	    { print STDERR "\n$testfile:$testline: Failed comparison with $file!\n"; $flag = 1; }
	    print STDERR "     Got: $a\n".
                 	 "Expected: $b\n";
	}
    }
    if ($flag eq '') { pass } else { fail }
}

sub shorterdecimals {
    local $_ = shift;
    s/(\d{4}\.\d{10})\d+/$1/g;
    s/(\.\d{12})\d+/$1/g;

t/auxfunctions.pl  view on Meta::CPAN

    open(F, "<$f") or die "getfile:cannot open $f:$!";
    my @r = <F>;
    close(F);
    return wantarray ? @r : join ('', @r);
}

sub putfile($@) {
    my $f = shift;
    local *F;
    open(F, ">$f") or die "putfile:cannot open $f:$!";
    print F '' unless @_;
    while (@_) { print F shift(@_) }
    close(F);
}

1;



( run in 0.774 second using v1.01-cache-2.11-cpan-de7293f3b23 )