Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

lib/Algorithm/DecisionTreeWithBagging.pm  view on Meta::CPAN

    my ($l,$n) = @_;
    my @bags;
    my $i;
    for ($i=0;  $i < int(@$l/$n); $i++) {
        push @bags, [@{$l}[$i*$n..($i+1)*$n-1]];
    }
    push @{$bags[-1]}, @{$l}[$i*$n..@{$l}-1];
    return \@bags;
}

sub check_for_illegal_params {
    my @params = @_;
    my @legal_params = qw / how_many_bags
                            bag_overlap_fraction
                            training_datafile
                            entropy_threshold
                            max_depth_desired
                            csv_class_column_index
                            csv_columns_for_features
                            symbolic_to_numeric_cardinality_threshold
                            number_of_histogram_bins
                            csv_cleanup_needed
                            debug1
                            debug2
                            debug3
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

# from perl docs:                                                                         
sub fisher_yates_shuffle {
    my $arr =  shift;
    my $i = @$arr;
    while (--$i) {
        my $j = int rand( $i + 1 );
        @$arr[$i, $j] = @$arr[$j, $i];
    }
}

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

1;



( run in 1.005 second using v1.01-cache-2.11-cpan-5837b0d9d2c )