AI-DecisionTree
view release on metacpan or search on metacpan
t/02-noisy.t view on Meta::CPAN
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#########################
use Test;
BEGIN { plan tests => 5 };
use AI::DecisionTree;
ok(1); # If we made it this far, we're ok.
#########################
my $dtree = AI::DecisionTree->new(noise_mode => 'pick_best');
ok $dtree;
my @names = split /, /, <DATA>;
chomp $names[-1];
# Train on first 600 instances
printf "Loading 600 training instances with %d attribute types each\n", scalar @names;
while (<DATA>) {
last unless 2..601;
chomp;
my @values = split /, /, $_;
my $result = pop @values;
my %pairs = map {$names[$_], $values[$_]} 0..$#names;
$dtree->add_instance(attributes => \%pairs,
result => $result,
);
}
print "Building decision tree\n";
$dtree->train;
ok(1);
# Test on rest of data, get at least 80%
print "Testing on remainder of data\n";
my ($good, $bad) = (0,0);
while (<DATA>) {
chomp;
my @values = split /, /, $_;
my $result = pop @values;
my %pairs = map {$names[$_], $values[$_]} 0..$#names;
my ($guess, $confidence) = $dtree->get_result(attributes => \%pairs);
$guess ||= ''; $confidence ||= '';
($guess eq $result ? $good : $bad)++;
#print "$guess : $result : $confidence\n";
}
my $accuracy = $good/($good + $bad);
ok $accuracy > .8;
print "Accuracy=$accuracy\n";
#use YAML; print Dump($dtree->rule_tree);
#print map "$_\n", $dtree->rule_statements;
if (eval "use GraphViz; 1") {
my $graphviz = $dtree->as_graphviz;
ok $graphviz;
if (0) {
# Only works on Mac OS X
my $file = '/tmp/tree2.png';
open my($fh), "> $file" or die "$file: $!";
print $fh $graphviz->as_png;
close $fh;
system('open', $file);
}
} else {
skip("Skipping: GraphViz is not installed", 0);
}
# The following data comes from the "C4.5" software package, in the
# "soybean.data" data file. It is somewhat noisy. I chose it because
# it was a pretty big data set, and because there are published
# results on it that I can compare to. Since the data seemed to be in
# order from most-information to least-information or something in the
# C4.5 distribution, I randomized the order of the instances. Note
# also that I'm treating the '?' value as a normal string value.
# It looks like the original data source is
# (a) Michalski,R.S. Learning by being told and learning from
# examples: an experimental comparison of the two methodes of knowledge
# acquisition in the context of developing an expert system for soybean
# desease diagnoiss", International Journal of Policy Analysis and
# Information Systems, 1980, 4(2), 125-161.
# The "C4.5" package is written by J.R. Quinlan and may be downloaded
# and used for free, but it is not supported and may not be
# redistributed.
__DATA__
date, plant-stand, precip, temp, hail, crop-hist, area-damaged, severity, seed-tmt, germination, plant-growth, leaves, leafspots-halo, leafspots-marg, leafspot-size, leaf-shread, leaf-malf, leaf-mild, stem, lodging, stem-cankers, canker-lesion, fruit...
june, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, present, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, absen...
july, lt-normal, norm, norm, ?, same-lst-yr, low-areas, ?, ?, ?, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, ?, above-sec-nde, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
september, lt-normal, gt-norm, gt-norm, yes, same-lst-sev-yrs, whole-field, minor, fungicide, lt-80%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent,...
june, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, absent...
may, normal, gt-norm, norm, yes, same-lst-two-yrs, whole-field, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, present, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, abse...
august, normal, gt-norm, norm, yes, same-lst-yr, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, absent,...
september, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, a...
august, normal, gt-norm, norm, yes, same-lst-yr, low-areas, severe, none, 90-100%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, absent, absent, none, absent, diseased, brown-w/blk-specks, nor...
june, lt-normal, norm, lt-norm, yes, same-lst-sev-yrs, low-areas, severe, none, lt-80%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, below-soil, dk-brown-blk, absent, absent, absent, none, absent, dna, dna, norm, absent, abs...
october, normal, lt-norm, gt-norm, no, same-lst-sev-yrs, whole-field, pot-severe, fungicide, 90-100%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, absent, tan, absent, absent, absent, black, present, norm, dna, norm, absent,...
may, normal, gt-norm, gt-norm, yes, same-lst-yr, low-areas, minor, fungicide, 80-89%, abnorm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, absent,...
august, normal, gt-norm, gt-norm, yes, same-lst-yr, upper-areas, pot-severe, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent,...
august, normal, gt-norm, gt-norm, yes, same-lst-two-yrs, whole-field, pot-severe, fungicide, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none...
april, lt-normal, ?, lt-norm, ?, same-lst-yr, whole-field, ?, ?, ?, abnorm, abnorm, no-yellow-halos, no-w-s-marg, gt-1/8, absent, present, ?, abnorm, ?, ?, ?, ?, ?, ?, ?, ?, dna, ?, ?, ?, ?, ?, ?, rotted, herbicide-injury
september, lt-normal, gt-norm, gt-norm, no, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, firm-and-dry, absent, none, absent, diseased, bro...
september, normal, lt-norm, gt-norm, yes, same-lst-sev-yrs, upper-areas, pot-severe, none, lt-80%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, absent, tan, absent, absent, absent, black, present, norm, dna, norm, absent, ab...
june, lt-normal, gt-norm, gt-norm, ?, same-lst-yr, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, above-soil, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
october, lt-normal, gt-norm, gt-norm, no, diff-lst-year, scattered, pot-severe, none, 80-89%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, firm-and-dry, absent, none, absent, diseased, brown-w/bl...
june, lt-normal, norm, norm, ?, same-lst-sev-yrs, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, above-sec-nde, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
august, normal, gt-norm, gt-norm, yes, same-lst-two-yrs, low-areas, pot-severe, fungicide, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, ...
july, ?, ?, ?, ?, ?, upper-areas, ?, ?, ?, ?, abnorm, absent, dna, dna, ?, present, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 2-4-d-injury
may, lt-normal, ?, lt-norm, ?, diff-lst-year, scattered, ?, ?, ?, abnorm, abnorm, absent, dna, dna, absent, present, ?, abnorm, ?, ?, ?, ?, ?, ?, ?, ?, dna, ?, ?, ?, ?, ?, ?, rotted, herbicide-injury
august, normal, gt-norm, norm, no, same-lst-yr, low-areas, minor, fungicide, lt-80%, norm, norm, absent, dna, dna, absent, absent, absent, norm, yes, absent, tan, absent, absent, absent, none, absent, norm, absent, abnorm, absent, present, norm, abse...
september, normal, gt-norm, norm, no, same-lst-two-yrs, upper-areas, minor, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, absent, firm-and-dry, absent, none, absent, disease...
july, normal, gt-norm, norm, no, same-lst-two-yrs, low-areas, pot-severe, fungicide, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, lt-1/8, present, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, ...
october, ?, ?, ?, ?, ?, low-areas, ?, ?, ?, ?, abnorm, absent, dna, dna, ?, present, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 2-4-d-injury
september, normal, lt-norm, norm, yes, same-lst-yr, scattered, pot-severe, fungicide, 90-100%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, absent, dna, absent, absent, absent, brown, absent, norm, absent, norm, absent, absent...
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, abnorm, absent, dna, dna, ?, present, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 2-4-d-injury
august, lt-normal, lt-norm, lt-norm, yes, same-lst-two-yrs, upper-areas, severe, none, 80-89%, abnorm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, absent, tan, absent, absent, absent, brown, absent, norm, dna, norm...
july, normal, norm, lt-norm, no, same-lst-sev-yrs, whole-field, minor, none, 80-89%, norm, abnorm, yellow-halos, no-w-s-marg, lt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, abnorm, present,...
may, lt-normal, gt-norm, lt-norm, yes, diff-lst-year, low-areas, pot-severe, none, 80-89%, abnorm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, below-soil, brown, absent, firm-and-dry, absent, none, absent, dna, dna, norm, absent, abs...
september, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, lt-80%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, present, absent, absent, none, absent, diseased, brown-w/b...
september, lt-normal, gt-norm, norm, no, same-lst-yr, low-areas, pot-severe, fungicide, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm,...
august, lt-normal, gt-norm, norm, yes, same-lst-two-yrs, low-areas, minor, fungicide, lt-80%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absen...
june, lt-normal, gt-norm, norm, no, same-lst-sev-yrs, low-areas, severe, fungicide, lt-80%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-soil, dk-brown-blk, absent, firm-and-dry, absent, none, absent, dna, dna, norm, a...
july, lt-normal, norm, norm, ?, same-lst-two-yrs, low-areas, ?, ?, ?, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, ?, above-sec-nde, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
september, normal, gt-norm, norm, yes, same-lst-yr, low-areas, pot-severe, none, 90-100%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, present, absent, absent, none, absent, diseased, brown-w/blk-spe...
july, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, absent...
september, lt-normal, gt-norm, gt-norm, no, same-lst-two-yrs, upper-areas, minor, fungicide, lt-80%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, firm-and-dry, absent, none, absent, diseased, bro...
september, normal, gt-norm, norm, yes, same-lst-yr, low-areas, pot-severe, none, 80-89%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, present, absent, absent, none, absent, diseased, brown-w/blk-spec...
may, normal, gt-norm, norm, no, same-lst-sev-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, yellow-halos, w-s-marg, gt-1/8, absent, absent, lower-surf, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, abnorm, pre...
august, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent...
june, normal, gt-norm, norm, yes, same-lst-two-yrs, whole-field, pot-severe, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, present, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, ab...
may, lt-normal, gt-norm, norm, ?, same-lst-yr, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, below-soil, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
september, lt-normal, gt-norm, norm, yes, same-lst-sev-yrs, scattered, minor, none, lt-80%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent,...
august, normal, gt-norm, norm, yes, same-lst-yr, upper-areas, pot-severe, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent, di...
june, lt-normal, gt-norm, gt-norm, ?, same-lst-two-yrs, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, above-soil, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
june, lt-normal, ?, lt-norm, ?, same-lst-yr, whole-field, ?, ?, ?, abnorm, abnorm, no-yellow-halos, no-w-s-marg, gt-1/8, absent, present, ?, abnorm, ?, ?, ?, ?, ?, ?, ?, ?, dna, ?, ?, ?, ?, ?, ?, rotted, herbicide-injury
( run in 1.697 second using v1.01-cache-2.11-cpan-39bf76dae61 )