Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

lib/Algorithm/DecisionTreeWithBagging.pm  view on Meta::CPAN

        $decision_classes{$sorted_classes[0]}++;
    }
    my @sorted_by_votes_decision_classes = sort {$decision_classes{$b} <=> $decision_classes{$a}} keys %decision_classes;
    return $sorted_by_votes_decision_classes[0];
}

sub get_all_class_names {
    my $self = shift;    
    return $self->{_all_trees}->{0}->{_class_names};
}

#########################################  Utility Routimes  #############################################
# Returns an array of two values, the min and the max, of an array of floats
sub minmax {
    my $arr = shift;
    my ($min, $max);
    foreach my $i (0..@{$arr}-1) {
        if ( (!defined $min) || ($arr->[$i] < $min) ) {
            $min = $arr->[$i];
        }
        if ( (!defined $max) || ($arr->[$i] > $max) ) {
            $max = $arr->[$i];
        }
    }
    return ($min, $max);
}

sub sample_index {
    my $arg = shift;
    $arg =~ /_(.+)$/;
    return $1;
}    

sub bags {
    my ($l,$n) = @_;
    my @bags;
    my $i;
    for ($i=0;  $i < int(@$l/$n); $i++) {
        push @bags, [@{$l}[$i*$n..($i+1)*$n-1]];
    }
    push @{$bags[-1]}, @{$l}[$i*$n..@{$l}-1];
    return \@bags;
}

sub check_for_illegal_params {
    my @params = @_;
    my @legal_params = qw / how_many_bags
                            bag_overlap_fraction
                            training_datafile
                            entropy_threshold
                            max_depth_desired
                            csv_class_column_index
                            csv_columns_for_features
                            symbolic_to_numeric_cardinality_threshold
                            number_of_histogram_bins
                            csv_cleanup_needed
                            debug1
                            debug2
                            debug3
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

# from perl docs:                                                                         
sub fisher_yates_shuffle {
    my $arr =  shift;
    my $i = @$arr;
    while (--$i) {
        my $j = int rand( $i + 1 );
        @$arr[$i, $j] = @$arr[$j, $i];
    }
}

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

1;



( run in 1.105 second using v1.01-cache-2.11-cpan-6b5c3043376 )