AI-DecisionTree

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

   do_purge() method.

 - Added the set_results() and copy_instances() methods, which let you
   re-use training instances from one tree to another.

 - Added the instances() and purge() accessor methods.

0.05  Thu Sep 12 01:22:34 AEST 2002

 - Fixed a concurrency problem that occurred when making more than one
   decision tree.  All tree data is now stored as member data, not
   class data.

 - DecisionTree.pm is now pure-perl again (though Instance.pm still
   has an XS component).

 - Fixed a one-off bug in the Instance.xs code that could create
   garbage data.

 - Handles "sparse" data better.  Sparse data means that every
   attribute doesn't have to be defined for every training/test
   instance.  This can now be a meaningful property - the absence of a
   value is currently equivalent to a special "<undef>" value.

 - Don't trigger warnings when undefined attribute values are
   encountered (as happens with sparse data).

 - Added documentation for the 'prune' parameter to new()

 - More consistent with memory allocation in Instance.xs - uses the
   perl memory macros/functions from `perldoc perlclib` instead of raw
   malloc/realloc/free.

 - Catches possible infinite loop situations when growing the tree
   (which shouldn't usually happen, but other mistakes can cause it)

eg/example.pl  view on Meta::CPAN

					     wind => 'strong',
					    } );
print "Result 2: $result\n";  # yes



# Show the created tree structure as rules
print map "$_\n", $dtree->rule_statements;


# Will barf on inconsistent data
my $t2 = new AI::DecisionTree;
$t2->add_instance( attributes => { foo => 'bar' },
		   result => 1 );
$t2->add_instance( attributes => { foo => 'bar' },
		   result => 0 );
eval {$t2->train};
print "$@\n";

lib/AI/DecisionTree.pm  view on Meta::CPAN

  
  foreach my $attr (keys %{$self->{attribute_values}}) {
    my $h = $self->{attribute_values}{$attr};
    $self->{attribute_values_reverse}{$attr} = [ undef, sort {$h->{$a} <=> $h->{$b}} keys %$h ];
  }
}

sub train {
  my ($self, %args) = @_;
  if (not @{ $self->{instances} }) {
    croak "Training data has been purged, can't re-train" if $self->{tree};
    croak "Must add training instances before calling train()";
  }
  
  $self->_create_lookup_hashes;
  local $self->{curr_depth} = 0;
  local $self->{max_depth} = $args{max_depth} if exists $args{max_depth};
  $self->{depth} = 0;
  $self->{tree} = $self->_expand_node( instances => $self->{instances} );
  $self->{total_instances} = @{$self->{instances}};
  

t/01-simple.t  view on Meta::CPAN


{
  # Test max_depth
  $dtree->train(max_depth => 1);
  my @rules = $dtree->rule_statements;
  ok @rules, 3;
  ok $dtree->depth, 1;
}

{
  # Should barf on inconsistent data
  my $t2 = new AI::DecisionTree;
  $t2->add_instance( attributes => { foo => 'bar' },
		     result => 1 );
  $t2->add_instance( attributes => { foo => 'bar' },
		     result => 0 );
  eval {$t2->train};
  ok( "$@", '/Inconsistent data/' );
}

{
  # Make sure two trees can be trained concurrently
  my $t1 = new AI::DecisionTree;
  my $t2 = new AI::DecisionTree;
  
  my @train = (
	       [farming => 'sheep very valuable farming'],
	       [farming => 'farming requires many kinds animals'],

t/02-noisy.t  view on Meta::CPAN

  my %pairs = map {$names[$_], $values[$_]} 0..$#names;

  $dtree->add_instance(attributes => \%pairs,
		       result => $result,
		      );
}
print "Building decision tree\n";
$dtree->train;
ok(1);

# Test on rest of data, get at least 80%
print "Testing on remainder of data\n";
my ($good, $bad) = (0,0);
while (<DATA>) {
  chomp;
  my @values = split /, /, $_;
  my $result = pop @values;
  my %pairs = map {$names[$_], $values[$_]} 0..$#names;

  my ($guess, $confidence) = $dtree->get_result(attributes => \%pairs);
  $guess ||= '';  $confidence ||= '';
  ($guess eq $result ? $good : $bad)++;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.496 second using v1.00-cache-2.02-grep-82fe00e-cpan-4673cadbf75 )