AI-DecisionTree

 view release on metacpan or  search on metacpan

t/02-noisy.t  view on Meta::CPAN

# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'

#########################

use Test;
BEGIN { plan tests => 5 };
use AI::DecisionTree;
ok(1); # If we made it this far, we're ok.

#########################

my $dtree = AI::DecisionTree->new(noise_mode => 'pick_best');
ok $dtree;

my @names = split /, /, <DATA>;
chomp $names[-1];

# Train on first 600 instances
printf "Loading 600 training instances with %d attribute types each\n", scalar @names;
while (<DATA>) {
  last unless 2..601;
  
  chomp;
  my @values = split /, /, $_;
  my $result = pop @values;
  my %pairs = map {$names[$_], $values[$_]} 0..$#names;

  $dtree->add_instance(attributes => \%pairs,
		       result => $result,
		      );
}
print "Building decision tree\n";
$dtree->train;
ok(1);

# Test on rest of data, get at least 80%
print "Testing on remainder of data\n";
my ($good, $bad) = (0,0);
while (<DATA>) {
  chomp;
  my @values = split /, /, $_;
  my $result = pop @values;
  my %pairs = map {$names[$_], $values[$_]} 0..$#names;

  my ($guess, $confidence) = $dtree->get_result(attributes => \%pairs);
  $guess ||= '';  $confidence ||= '';
  ($guess eq $result ? $good : $bad)++;
  
  #print "$guess : $result : $confidence\n";
}
my $accuracy = $good/($good + $bad);
ok $accuracy > .8;
print "Accuracy=$accuracy\n";

#use YAML; print Dump($dtree->rule_tree);
#print map "$_\n", $dtree->rule_statements;

if (eval "use GraphViz; 1") {
  my $graphviz = $dtree->as_graphviz;
  ok $graphviz;

  if (0) {
    # Only works on Mac OS X
    my $file = '/tmp/tree2.png';
    open my($fh), "> $file" or die "$file: $!";
    print $fh $graphviz->as_png;
    close $fh;
    system('open', $file);
  }
} else {
  skip("Skipping: GraphViz is not installed", 0);
}

# The following data comes from the "C4.5" software package, in the
# "soybean.data" data file.  It is somewhat noisy.  I chose it because
# it was a pretty big data set, and because there are published
# results on it that I can compare to.  Since the data seemed to be in
# order from most-information to least-information or something in the
# C4.5 distribution, I randomized the order of the instances.  Note
# also that I'm treating the '?' value as a normal string value.

# It looks like the original data source is
#     (a) Michalski,R.S. Learning by being told and learning from
#         examples: an experimental comparison of the two methodes of knowledge
#         acquisition in the context of developing an expert system for soybean
#         desease diagnoiss", International Journal of Policy Analysis and
#         Information Systems, 1980, 4(2), 125-161.

# The "C4.5" package is written by J.R. Quinlan and may be downloaded
# and used for free, but it is not supported and may not be
# redistributed.

__DATA__
date, plant-stand, precip, temp, hail, crop-hist, area-damaged, severity, seed-tmt, germination, plant-growth, leaves, leafspots-halo, leafspots-marg, leafspot-size, leaf-shread, leaf-malf, leaf-mild, stem, lodging, stem-cankers, canker-lesion, fruit...
june, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, present, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, absen...
july, lt-normal, norm, norm, ?, same-lst-yr, low-areas, ?, ?, ?, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, ?, above-sec-nde, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
september, lt-normal, gt-norm, gt-norm, yes, same-lst-sev-yrs, whole-field, minor, fungicide, lt-80%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent,...
june, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, absent...
may, normal, gt-norm, norm, yes, same-lst-two-yrs, whole-field, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, present, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, abse...
august, normal, gt-norm, norm, yes, same-lst-yr, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, absent,...
september, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, a...
august, normal, gt-norm, norm, yes, same-lst-yr, low-areas, severe, none, 90-100%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, absent, absent, none, absent, diseased, brown-w/blk-specks, nor...
june, lt-normal, norm, lt-norm, yes, same-lst-sev-yrs, low-areas, severe, none, lt-80%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, below-soil, dk-brown-blk, absent, absent, absent, none, absent, dna, dna, norm, absent, abs...
october, normal, lt-norm, gt-norm, no, same-lst-sev-yrs, whole-field, pot-severe, fungicide, 90-100%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, absent, tan, absent, absent, absent, black, present, norm, dna, norm, absent,...
may, normal, gt-norm, gt-norm, yes, same-lst-yr, low-areas, minor, fungicide, 80-89%, abnorm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, absent,...
august, normal, gt-norm, gt-norm, yes, same-lst-yr, upper-areas, pot-severe, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent,...
august, normal, gt-norm, gt-norm, yes, same-lst-two-yrs, whole-field, pot-severe, fungicide, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none...
april, lt-normal, ?, lt-norm, ?, same-lst-yr, whole-field, ?, ?, ?, abnorm, abnorm, no-yellow-halos, no-w-s-marg, gt-1/8, absent, present, ?, abnorm, ?, ?, ?, ?, ?, ?, ?, ?, dna, ?, ?, ?, ?, ?, ?, rotted, herbicide-injury
september, lt-normal, gt-norm, gt-norm, no, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, firm-and-dry, absent, none, absent, diseased, bro...
september, normal, lt-norm, gt-norm, yes, same-lst-sev-yrs, upper-areas, pot-severe, none, lt-80%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, absent, tan, absent, absent, absent, black, present, norm, dna, norm, absent, ab...
june, lt-normal, gt-norm, gt-norm, ?, same-lst-yr, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, above-soil, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
october, lt-normal, gt-norm, gt-norm, no, diff-lst-year, scattered, pot-severe, none, 80-89%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, firm-and-dry, absent, none, absent, diseased, brown-w/bl...
june, lt-normal, norm, norm, ?, same-lst-sev-yrs, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, above-sec-nde, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
august, normal, gt-norm, gt-norm, yes, same-lst-two-yrs, low-areas, pot-severe, fungicide, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, ...
july, ?, ?, ?, ?, ?, upper-areas, ?, ?, ?, ?, abnorm, absent, dna, dna, ?, present, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 2-4-d-injury
may, lt-normal, ?, lt-norm, ?, diff-lst-year, scattered, ?, ?, ?, abnorm, abnorm, absent, dna, dna, absent, present, ?, abnorm, ?, ?, ?, ?, ?, ?, ?, ?, dna, ?, ?, ?, ?, ?, ?, rotted, herbicide-injury
august, normal, gt-norm, norm, no, same-lst-yr, low-areas, minor, fungicide, lt-80%, norm, norm, absent, dna, dna, absent, absent, absent, norm, yes, absent, tan, absent, absent, absent, none, absent, norm, absent, abnorm, absent, present, norm, abse...
september, normal, gt-norm, norm, no, same-lst-two-yrs, upper-areas, minor, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, absent, firm-and-dry, absent, none, absent, disease...
july, normal, gt-norm, norm, no, same-lst-two-yrs, low-areas, pot-severe, fungicide, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, lt-1/8, present, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, ...
october, ?, ?, ?, ?, ?, low-areas, ?, ?, ?, ?, abnorm, absent, dna, dna, ?, present, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 2-4-d-injury
september, normal, lt-norm, norm, yes, same-lst-yr, scattered, pot-severe, fungicide, 90-100%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, absent, dna, absent, absent, absent, brown, absent, norm, absent, norm, absent, absent...
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, abnorm, absent, dna, dna, ?, present, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 2-4-d-injury
august, lt-normal, lt-norm, lt-norm, yes, same-lst-two-yrs, upper-areas, severe, none, 80-89%, abnorm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, absent, tan, absent, absent, absent, brown, absent, norm, dna, norm...
july, normal, norm, lt-norm, no, same-lst-sev-yrs, whole-field, minor, none, 80-89%, norm, abnorm, yellow-halos, no-w-s-marg, lt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, abnorm, present,...
may, lt-normal, gt-norm, lt-norm, yes, diff-lst-year, low-areas, pot-severe, none, 80-89%, abnorm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, below-soil, brown, absent, firm-and-dry, absent, none, absent, dna, dna, norm, absent, abs...
september, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, lt-80%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, present, absent, absent, none, absent, diseased, brown-w/b...
september, lt-normal, gt-norm, norm, no, same-lst-yr, low-areas, pot-severe, fungicide, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm,...
august, lt-normal, gt-norm, norm, yes, same-lst-two-yrs, low-areas, minor, fungicide, lt-80%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absen...
june, lt-normal, gt-norm, norm, no, same-lst-sev-yrs, low-areas, severe, fungicide, lt-80%, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-soil, dk-brown-blk, absent, firm-and-dry, absent, none, absent, dna, dna, norm, a...
july, lt-normal, norm, norm, ?, same-lst-two-yrs, low-areas, ?, ?, ?, abnorm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, ?, above-sec-nde, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
september, normal, gt-norm, norm, yes, same-lst-yr, low-areas, pot-severe, none, 90-100%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, present, absent, absent, none, absent, diseased, brown-w/blk-spe...
july, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, absent, absent, none, absent, norm, absent...
september, lt-normal, gt-norm, gt-norm, no, same-lst-two-yrs, upper-areas, minor, fungicide, lt-80%, norm, abnorm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, brown, present, firm-and-dry, absent, none, absent, diseased, bro...
september, normal, gt-norm, norm, yes, same-lst-yr, low-areas, pot-severe, none, 80-89%, norm, norm, absent, dna, dna, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, present, absent, absent, none, absent, diseased, brown-w/blk-spec...
may, normal, gt-norm, norm, no, same-lst-sev-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, yellow-halos, w-s-marg, gt-1/8, absent, absent, lower-surf, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, abnorm, pre...
august, normal, gt-norm, norm, yes, same-lst-two-yrs, upper-areas, pot-severe, none, 80-89%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent...
june, normal, gt-norm, norm, yes, same-lst-two-yrs, whole-field, pot-severe, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, present, absent, absent, norm, yes, absent, dna, absent, absent, absent, none, absent, norm, absent, norm, ab...
may, lt-normal, gt-norm, norm, ?, same-lst-yr, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, below-soil, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
september, lt-normal, gt-norm, norm, yes, same-lst-sev-yrs, scattered, minor, none, lt-80%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent,...
august, normal, gt-norm, norm, yes, same-lst-yr, upper-areas, pot-severe, none, 90-100%, norm, abnorm, no-yellow-halos, w-s-marg, gt-1/8, absent, absent, absent, abnorm, yes, above-sec-nde, dk-brown-blk, absent, firm-and-dry, absent, none, absent, di...
june, lt-normal, gt-norm, gt-norm, ?, same-lst-two-yrs, low-areas, ?, ?, ?, abnorm, abnorm, ?, ?, ?, ?, ?, ?, abnorm, ?, above-soil, dk-brown-blk, ?, absent, absent, none, absent, ?, ?, ?, ?, ?, ?, ?, rotted, phytophthora-rot
june, lt-normal, ?, lt-norm, ?, same-lst-yr, whole-field, ?, ?, ?, abnorm, abnorm, no-yellow-halos, no-w-s-marg, gt-1/8, absent, present, ?, abnorm, ?, ?, ?, ?, ?, ?, ?, ?, dna, ?, ?, ?, ?, ?, ?, rotted, herbicide-injury



( run in 1.697 second using v1.01-cache-2.11-cpan-39bf76dae61 )