AI-NaiveBayes1
view release on metacpan or search on metacpan
NaiveBayes1.pm view on Meta::CPAN
attvals => {},
real_stat => {},
numof_instances => 0,
stat_labels => {},
stat_attributes => {},
smoothing => {},
attribute_type => {},
}, $package;
}
sub set_real {
my ($self, @attr) = @_;
foreach my $a (@attr) { $self->{attribute_type}{$a} = 'real' }
}
sub import_from_YAML {
my $package = shift;
my $yaml = shift;
my $self = YAML::Load($yaml);
return bless $self, $package;
}
NaiveBayes1.pm view on Meta::CPAN
=head2 Attribute Smoothing
For an attribute A one can specify:
$nb->{smoothing}{A} = 'unseen count=0.5';
to provide a count for unseen data. The count is taken into
consideration in training and prediction, when any unseen attribute
values are observed. Zero probabilities can be prevented in this way.
A count other than 0.5 can be provided, but if it is <=0 it will be
set to 0.5. The method is similar to add-one smoothing. A special
attribute value '*' is used for all unseen data.
=head1 METHODS
=head2 Constructor Methods
=over 4
=item new()
NaiveBayes1.pm view on Meta::CPAN
=item add_csv_file($filename)
Add instances from a CSV file. Primitive format implementation (e.g.,
no commas allowed in attribute names or values).
=item drop_attributes(@attributes)
Delete attributes after adding instances.
=item set_real(list_of_attributes)
Delares a list of attributes to be real-valued. During training,
their conditional probabilities will be modeled with Gaussian (normal)
distributions.
=item C<add_instance(attributes=E<gt>HASH,label=E<gt>STRING|ARRAY)>
Adds a training instance to the categorizer.
=item C<add_instances(attributes=E<gt>HASH,label=E<gt>STRING|ARRAY,cases=E<gt>NUMBER)>
NaiveBayes1.pm view on Meta::CPAN
# rainy,65,70,TRUE,no
# overcast,64,65,TRUE,yes
# sunny,72,95,FALSE,no
# sunny,69,70,FALSE,yes
# rainy,75,80,FALSE,yes
# sunny,75,70,TRUE,yes
# overcast,72,90,TRUE,yes
# overcast,81,75,FALSE,yes
# rainy,71,91,TRUE,no
$nb->set_real('temperature', 'humidity');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>85,humidity=>85,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>80,humidity=>90,windy=>'TRUE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>83,humidity=>86,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>70,humidity=>96,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>68,humidity=>80,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>65,humidity=>70,windy=>'TRUE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>64,humidity=>65,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
# overcast,64,65,TRUE,yes
# sunny,72,95,FALSE,no
# sunny,69,70,FALSE,yes
# rainy,75,80,FALSE,yes
# sunny,75,70,TRUE,yes
# overcast,72,90,TRUE,yes
# overcast,81,75,FALSE,yes
# rainy,71,91,TRUE,no
#
$nb->set_real('temperature', 'humidity');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>85,humidity=>85,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>80,humidity=>90,windy=>'TRUE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>83,humidity=>86,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>70,humidity=>96,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>68,humidity=>80,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'rainy',temperature=>65,humidity=>70,windy=>'TRUE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'overcast',temperature=>64,humidity=>65,windy=>'TRUE'},label=>'play=yes');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
$nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>2042},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>3893},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>3601},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>2176},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>877},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'Y',size=>272},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>2740},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'Y',html=>'Y',size=>514},label=>'spam=Y');
$nb->add_instance(attributes=>{morning=>'N',html=>'N',size=>1321},label=>'spam=Y');
$nb->set_real('size');
$nb->train;
my $printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/5-1.out', $printedmodel);
&compare_by_line($printedmodel, 't/5-1.out', __FILE__, __LINE__);
#putfile('t/5-2.out', $nb->export_to_YAML());
#is($nb->export_to_YAML(), getfile('t/5-2.out'));
$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=Y',cases=>30);
$nb->add_instances(attributes=>{C=>'Y',F=>2},label=>'S=N',cases=>10);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=Y',cases=>18);
$nb->add_instances(attributes=>{C=>'Y',F=>0},label=>'S=N',cases=>16);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->set_real('F');
$nb->train;
$printedmodel = "Model:\n" . $nb->print_model;
$printedmodel = &shorterdecimals($printedmodel);
#putfile('t/6-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/6-3.out', __FILE__, __LINE__);
$nb->export_to_YAML_file('t/tmp6-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp6-2');
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=Y',cases=>22);
$nb->add_instances(attributes=>{C=>'N',F=>2},label=>'S=N',cases=>14);
#$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=Y',cases=> 6);
#$nb->add_instances(attributes=>{C=>'N',F=>0},label=>'S=N',cases=>84);
$nb->add_table(" C F S count\n".
"------------------\n".
" N 0 Y 6 \n".
" N 0 N 84 \n".
'');
$nb->set_real('F');
$nb->train;
$printedmodel = &shorterdecimals("Model:\n" . $nb->print_model);
#putfile('t/7-3.out', $printedmodel);
&compare_by_line($printedmodel, 't/7-3.out', __FILE__, __LINE__);
$nb->export_to_YAML_file('t/tmp7-2');
$nb1 = AI::NaiveBayes1->import_from_YAML_file('t/tmp7-2');
( run in 1.562 second using v1.01-cache-2.11-cpan-49f99fa48dc )