Algorithm-DecisionTree
view release on metacpan or search on metacpan
lib/Algorithm/RandomizedTreesForBigData.pm view on Meta::CPAN
my $self = shift;
die "You must first call 'classify_with_all_trees()' before invoking 'get_majority_vote_classifiction()'" unless $self->{_classifications};
my @classifications = @{$self->{_classifications}};
my %decision_classes = map { $_ => 0 } @{$self->{_all_trees}->{0}->{_class_names}};
foreach my $t (0 .. $self->{_how_many_trees}-1) {
my $classification = $classifications[$t];
delete $classification->{'solution_path'} if exists $classification->{'solution_path'};
my @sorted_classes = sort {$classification->{$b} <=> $classification->{$a}} keys %$classification;
$decision_classes{$sorted_classes[0]}++;
}
my @sorted_by_votes_decision_classes = sort {$decision_classes{$b} <=> $decision_classes{$a}} keys %decision_classes;
return $sorted_by_votes_decision_classes[0];
}
######################################## Utility Routines ##############################################
sub sample_index {
my $arg = shift;
$arg =~ /_(.+)$/;
return $1;
}
# checks whether an element is in an array:
sub contained_in {
my $ele = shift;
my @array = @_;
my $count = 0;
map {$count++ if $ele eq $_} @array;
return $count;
}
# Returns an array of two values, the min and the max, of an array of floats
sub minmax {
my $arr = shift;
my ($min, $max);
foreach my $i (0..@{$arr}-1) {
if ( (!defined $min) || ($arr->[$i] < $min) ) {
$min = $arr->[$i];
}
if ( (!defined $max) || ($arr->[$i] > $max) ) {
$max = $arr->[$i];
}
}
return ($min, $max);
}
sub check_for_illegal_params {
my @params = @_;
my @legal_params = qw / training_datafile
entropy_threshold
max_depth_desired
csv_class_column_index
csv_columns_for_features
symbolic_to_numeric_cardinality_threshold
number_of_histogram_bins
how_many_trees
how_many_training_samples_per_tree
looking_for_needles_in_haystack
csv_cleanup_needed
debug1
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
1;
( run in 0.668 second using v1.01-cache-2.11-cpan-6b5c3043376 )