AI-DecisionTree
view release on metacpan or search on metacpan
lib/AI/DecisionTree.pm view on Meta::CPAN
($best_attr, $best_score) = ($attr, $score) if $score < $best_score;
}
return $best_attr;
}
sub entropy2 {
shift;
my ($counts, $total) = @_;
# Entropy is defined with log base 2 - we just divide by log(2) at the end to adjust.
my $sum = 0;
$sum += $_ * log($_) foreach values %$counts;
return +(log($total) - $sum/$total)/log(2);
}
sub entropy {
shift;
my %count;
$count{$_}++ foreach @_;
# Entropy is defined with log base 2 - we just divide by log(2) at the end to adjust.
my $sum = 0;
$sum += $_ * log($_) foreach values %count;
return +(log(@_) - $sum/@_)/log(2);
}
sub prune_tree {
my $self = shift;
# We use a minimum-description-length approach. We calculate the
# score of each node:
# n = number of nodes below
# r = number of results (categories) in the entire tree
# i = number of instances in the entire tree
# e = number of errors below this node
# Hypothesis description length (MML):
# describe tree: number of nodes + number of edges
# describe exceptions: num_exceptions * log2(total_num_instances) * log2(total_num_results)
my $r = keys %{ $self->{results} };
my $i = $self->{tree}{instances};
my $exception_cost = log($r) * log($i) / log(2)**2;
# Pruning can turn a branch into a leaf
my $maybe_prune = sub {
my ($self, $node) = @_;
return unless $node->{children}; # Can't prune leaves
my $nodes_below = $self->nodes_below($node);
my $tree_cost = 2 * $nodes_below - 1; # $edges_below == $nodes_below - 1
my $exceptions = $self->exceptions( $node );
( run in 0.471 second using v1.01-cache-2.11-cpan-a5abf4f5562 )