Algorithm-DecisionTree
view release on metacpan or search on metacpan
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
$decision_classes{$sorted_classes[0]}++;
}
my @sorted_by_votes_decision_classes = sort {$decision_classes{$b} <=> $decision_classes{$a}} keys %decision_classes;
return $sorted_by_votes_decision_classes[0];
}
sub get_all_class_names {
my $self = shift;
return $self->{_all_trees}->{0}->{_class_names};
}
######################################### Utility Routimes #############################################
# Returns an array of two values, the min and the max, of an array of floats
sub minmax {
my $arr = shift;
my ($min, $max);
foreach my $i (0..@{$arr}-1) {
if ( (!defined $min) || ($arr->[$i] < $min) ) {
$min = $arr->[$i];
}
if ( (!defined $max) || ($arr->[$i] > $max) ) {
$max = $arr->[$i];
}
}
return ($min, $max);
}
sub sample_index {
my $arg = shift;
$arg =~ /_(.+)$/;
return $1;
}
sub bags {
my ($l,$n) = @_;
my @bags;
my $i;
for ($i=0; $i < int(@$l/$n); $i++) {
push @bags, [@{$l}[$i*$n..($i+1)*$n-1]];
}
push @{$bags[-1]}, @{$l}[$i*$n..@{$l}-1];
return \@bags;
}
sub check_for_illegal_params {
my @params = @_;
my @legal_params = qw / how_many_bags
bag_overlap_fraction
training_datafile
entropy_threshold
max_depth_desired
csv_class_column_index
csv_columns_for_features
symbolic_to_numeric_cardinality_threshold
number_of_histogram_bins
csv_cleanup_needed
debug1
debug2
debug3
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
# from perl docs:
sub fisher_yates_shuffle {
my $arr = shift;
my $i = @$arr;
while (--$i) {
my $j = int rand( $i + 1 );
@$arr[$i, $j] = @$arr[$j, $i];
}
}
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
1;
( run in 1.105 second using v1.01-cache-2.11-cpan-6b5c3043376 )