Algorithm-DecisionTree
view release on metacpan or search on metacpan
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
my ($l,$n) = @_;
my @bags;
my $i;
for ($i=0; $i < int(@$l/$n); $i++) {
push @bags, [@{$l}[$i*$n..($i+1)*$n-1]];
}
push @{$bags[-1]}, @{$l}[$i*$n..@{$l}-1];
return \@bags;
}
sub check_for_illegal_params {
my @params = @_;
my @legal_params = qw / how_many_bags
bag_overlap_fraction
training_datafile
entropy_threshold
max_depth_desired
csv_class_column_index
csv_columns_for_features
symbolic_to_numeric_cardinality_threshold
number_of_histogram_bins
csv_cleanup_needed
debug1
debug2
debug3
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
# from perl docs:
sub fisher_yates_shuffle {
my $arr = shift;
my $i = @$arr;
while (--$i) {
my $j = int rand( $i + 1 );
@$arr[$i, $j] = @$arr[$j, $i];
}
}
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
1;
( run in 1.005 second using v1.01-cache-2.11-cpan-5837b0d9d2c )