Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

Examples/get_indexes_associated_with_fields.pl  view on Meta::CPAN


sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

ExamplesRandomizedTrees/classify_database_records.pl  view on Meta::CPAN

##  and the same field in all other records is an ID number for the record.
sub cleanup_csv {
    my $line = shift;
    $line =~ tr/()[]{}/      /;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,\s*[^,]+\s+[^,]+\s*,/g;
    for (@white_spaced) {
        my $item = $_;
        $item = substr($item, 0, -1);
        $item = join '_',  split /\s+/, $item unless $item =~ /,\s+$/;
        substr($line, index($line, $_), length($_)) = "$item,";
    }
    $line =~ s/,\s*(?=,)/,NA/g;
    return $line;
}

# checks whether an element is in an array:
sub contained_in {
    my $ele = shift;
    my @array = @_;
    my $count = 0;

lib/Algorithm/BoostedDecisionTree.pm  view on Meta::CPAN

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

1;

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

######################################### Class EvalTrainingData  ########################################

##  This subclass of the DecisionTree class is used to evaluate the quality of your
##  training data by running a 10-fold cross-validation test on it. This test divides
##  all of the training data into ten parts, with nine parts used for training a

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

        my $feature_at_node = $self->get_feature() || " ";
        my $node_creation_entropy_at_node = $self->get_node_entropy();
        my $print_node_creation_entropy_at_node = sprintf("%.3f", $node_creation_entropy_at_node);
        my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
        my @class_probabilities = @{$self->get_class_probabilities()};
        my @print_class_probabilities = map {sprintf("%0.3f", $_)} @class_probabilities;
        my @class_names = @{$self->get_class_names()};
        my @print_class_probabilities_with_class =
            map {"$class_names[$_]" . '=>' . $print_class_probabilities[$_]} 0..@class_names-1;
        print "NODE $serial_num: $offset BRANCH TESTS TO NODE: @branch_features_and_values_or_thresholds\n";
        my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
        print "$second_line_offset" . "Decision Feature: $feature_at_node    Node Creation Entropy: " ,
              "$print_node_creation_entropy_at_node   Class Probs: @print_class_probabilities_with_class\n\n";
        $offset .= "   ";
        foreach my $child (@{$self->get_children()}) {
            $child->display_decision_tree($offset);
        }
    } else {
        my $node_creation_entropy_at_node = $self->get_node_entropy();
        my $print_node_creation_entropy_at_node = sprintf("%.3f", $node_creation_entropy_at_node);
        my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
        my @class_probabilities = @{$self->get_class_probabilities()};
        my @print_class_probabilities = map {sprintf("%0.3f", $_)} @class_probabilities;
        my @class_names = @{$self->get_class_names()};
        my @print_class_probabilities_with_class =
            map {"$class_names[$_]" . '=>' . $print_class_probabilities[$_]} 0..@class_names-1;
        print "NODE $serial_num: $offset BRANCH TESTS TO LEAF NODE: @branch_features_and_values_or_thresholds\n";
        my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
        print "$second_line_offset" . "Node Creation Entropy: $print_node_creation_entropy_at_node   " .
              "Class Probs: @print_class_probabilities_with_class\n\n";
    }
}


##############################  Generate Your Own Numeric Training Data  #################################
#############################      Class TrainingDataGeneratorNumeric     ################################

##  See the script generate_training_data_numeric.pl in the examples

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

    my $output_file_testing = $self->{_output_test_datafile};
    my @all_params;
    my $param_string;
    open INPUT, $input_parameter_file || "unable to open parameter file: $!";
    @all_params = <INPUT>;
    @all_params = grep { $_ !~ /^[ ]*#/ } @all_params;
    @all_params = grep { $_ =~ s/\r?\n?$//} @all_params;
    $param_string = join ' ', @all_params;
    my ($class_names, $class_priors, $rest_param) = 
              $param_string =~ /^\s*class names:(.*?)\s*class priors:(.*?)(feature: .*)/;
    my @class_names = grep {defined($_) && length($_) > 0} split /\s+/, $1;
    push @{$self->{_class_names}}, @class_names;
    my @class_priors =   grep {defined($_) && length($_) > 0} split /\s+/, $2;
    push @{$self->{_class_priors}}, @class_priors;    
    my ($feature_string, $bias_string) = $rest_param =~ /(feature:.*?) (bias:.*)/;
    my %features_and_values_hash;
    my @features = split /(feature[:])/, $feature_string;
    @features = grep {defined($_) && length($_) > 0} @features;
    foreach my $item (@features) {
        next if $item =~ /feature/;
        my @splits = split / /, $item;
        @splits = grep {defined($_) && length($_) > 0} @splits;
        foreach my $i (0..@splits-1) {
            if ($i == 0) {
                $features_and_values_hash{$splits[0]} = [];
            } else {
                next if $splits[$i] =~ /values/;
                push @{$features_and_values_hash{$splits[0]}}, $splits[$i];
            }
        }
    }
    $self->{_features_and_values_hash} = \%features_and_values_hash;
    my %bias_hash = %{$self->{_bias_hash}};
    my @biases = split /(bias[:]\s*class[:])/, $bias_string;
    @biases = grep {defined($_) && length($_) > 0} @biases;
    foreach my $item (@biases) {
        next if $item =~ /bias/;
        my @splits = split /\s+/, $item;
        @splits = grep {defined($_) && length($_) > 0} @splits;
        my $feature_name;
        foreach my $i (0..@splits-1) {
            if ($i == 0) {
                $bias_hash{$splits[0]} = {};
            } elsif ($splits[$i] =~ /(^.+)[:]$/) {
                $feature_name = $1;
                $bias_hash{$splits[0]}->{$feature_name} = [];
            } else {
                next if !defined $feature_name;
                push @{$bias_hash{$splits[0]}->{$feature_name}}, $splits[$i]

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

            my @values = @{$features_and_values_hash{$feature}};
            my $bias_string = $bias_hash{$class_label}->{$feature}->[0];
            my $no_bias = 1.0 / @values;
            $bias_string = "$values[0]" . "=$no_bias" if !defined $bias_string;
            my %value_priors_to_unit_interval_map;
            my @splits = split /\s*=\s*/, $bias_string;
            my $chosen_for_bias_value = $splits[0];
            my $chosen_bias = $splits[1];
            my $remaining_bias = 1 - $chosen_bias;
            my $remaining_portion_bias = $remaining_bias / (@values -1);
            @splits = grep {defined($_) && length($_) > 0} @splits;
            my $accumulated = 0;
            foreach my $i (0..@values-1) {
                if ($values[$i] eq $chosen_for_bias_value) {
                    $value_priors_to_unit_interval_map{$values[$i]} 
                        = [$accumulated, $accumulated + $chosen_bias];
                    $accumulated += $chosen_bias;
                } else {
                    $value_priors_to_unit_interval_map{$values[$i]} 
                      = [$accumulated, $accumulated + $remaining_portion_bias];
                    $accumulated += $remaining_portion_bias;           

lib/Algorithm/DecisionTreeWithBagging.pm  view on Meta::CPAN

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

1;

lib/Algorithm/RandomizedTreesForBigData.pm  view on Meta::CPAN

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}


1;

lib/Algorithm/RegressionTree.pm  view on Meta::CPAN

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

sub transpose {
    my $matrix = shift;
    my $num_rows = $matrix->rows();
    my $num_cols = $matrix->cols();
    my $transpose = Math::GSL::Matrix->new($num_cols, $num_rows);

lib/Algorithm/RegressionTree.pm  view on Meta::CPAN

}

sub display_regression_tree {
    my $self = shift;
    my $offset = shift;
    my $serial_num = $self->get_serial_num();
    if (@{$self->get_children()} > 0) {
        my $feature_at_node = $self->get_feature() || " ";
        my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
        print "NODE $serial_num: $offset BRANCH TESTS TO NODE: @branch_features_and_values_or_thresholds\n";
        my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
        print "$second_line_offset" . "Decision Feature: $feature_at_node\n\n";
        $offset .= "   ";
        foreach my $child (@{$self->get_children()}) {
            $child->display_regression_tree($offset);
        }
    } else {
        my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
        print "NODE $serial_num: $offset BRANCH TESTS TO LEAF NODE: @branch_features_and_values_or_thresholds\n";
        my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
    }
}

1;



( run in 0.661 second using v1.01-cache-2.11-cpan-65fba6d93b7 )