view release on metacpan or search on metacpan
Examples/get_indexes_associated_with_fields.pl view on Meta::CPAN
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
ExamplesRandomizedTrees/classify_database_records.pl view on Meta::CPAN
## and the same field in all other records is an ID number for the record.
sub cleanup_csv {
my $line = shift;
$line =~ tr/()[]{}/ /;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,\s*[^,]+\s+[^,]+\s*,/g;
for (@white_spaced) {
my $item = $_;
$item = substr($item, 0, -1);
$item = join '_', split /\s+/, $item unless $item =~ /,\s+$/;
substr($line, index($line, $_), length($_)) = "$item,";
}
$line =~ s/,\s*(?=,)/,NA/g;
return $line;
}
# checks whether an element is in an array:
sub contained_in {
my $ele = shift;
my @array = @_;
my $count = 0;
lib/Algorithm/BoostedDecisionTree.pm view on Meta::CPAN
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
1;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
######################################### Class EvalTrainingData ########################################
## This subclass of the DecisionTree class is used to evaluate the quality of your
## training data by running a 10-fold cross-validation test on it. This test divides
## all of the training data into ten parts, with nine parts used for training a
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my $feature_at_node = $self->get_feature() || " ";
my $node_creation_entropy_at_node = $self->get_node_entropy();
my $print_node_creation_entropy_at_node = sprintf("%.3f", $node_creation_entropy_at_node);
my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
my @class_probabilities = @{$self->get_class_probabilities()};
my @print_class_probabilities = map {sprintf("%0.3f", $_)} @class_probabilities;
my @class_names = @{$self->get_class_names()};
my @print_class_probabilities_with_class =
map {"$class_names[$_]" . '=>' . $print_class_probabilities[$_]} 0..@class_names-1;
print "NODE $serial_num: $offset BRANCH TESTS TO NODE: @branch_features_and_values_or_thresholds\n";
my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
print "$second_line_offset" . "Decision Feature: $feature_at_node Node Creation Entropy: " ,
"$print_node_creation_entropy_at_node Class Probs: @print_class_probabilities_with_class\n\n";
$offset .= " ";
foreach my $child (@{$self->get_children()}) {
$child->display_decision_tree($offset);
}
} else {
my $node_creation_entropy_at_node = $self->get_node_entropy();
my $print_node_creation_entropy_at_node = sprintf("%.3f", $node_creation_entropy_at_node);
my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
my @class_probabilities = @{$self->get_class_probabilities()};
my @print_class_probabilities = map {sprintf("%0.3f", $_)} @class_probabilities;
my @class_names = @{$self->get_class_names()};
my @print_class_probabilities_with_class =
map {"$class_names[$_]" . '=>' . $print_class_probabilities[$_]} 0..@class_names-1;
print "NODE $serial_num: $offset BRANCH TESTS TO LEAF NODE: @branch_features_and_values_or_thresholds\n";
my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
print "$second_line_offset" . "Node Creation Entropy: $print_node_creation_entropy_at_node " .
"Class Probs: @print_class_probabilities_with_class\n\n";
}
}
############################## Generate Your Own Numeric Training Data #################################
############################# Class TrainingDataGeneratorNumeric ################################
## See the script generate_training_data_numeric.pl in the examples
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my $output_file_testing = $self->{_output_test_datafile};
my @all_params;
my $param_string;
open INPUT, $input_parameter_file || "unable to open parameter file: $!";
@all_params = <INPUT>;
@all_params = grep { $_ !~ /^[ ]*#/ } @all_params;
@all_params = grep { $_ =~ s/\r?\n?$//} @all_params;
$param_string = join ' ', @all_params;
my ($class_names, $class_priors, $rest_param) =
$param_string =~ /^\s*class names:(.*?)\s*class priors:(.*?)(feature: .*)/;
my @class_names = grep {defined($_) && length($_) > 0} split /\s+/, $1;
push @{$self->{_class_names}}, @class_names;
my @class_priors = grep {defined($_) && length($_) > 0} split /\s+/, $2;
push @{$self->{_class_priors}}, @class_priors;
my ($feature_string, $bias_string) = $rest_param =~ /(feature:.*?) (bias:.*)/;
my %features_and_values_hash;
my @features = split /(feature[:])/, $feature_string;
@features = grep {defined($_) && length($_) > 0} @features;
foreach my $item (@features) {
next if $item =~ /feature/;
my @splits = split / /, $item;
@splits = grep {defined($_) && length($_) > 0} @splits;
foreach my $i (0..@splits-1) {
if ($i == 0) {
$features_and_values_hash{$splits[0]} = [];
} else {
next if $splits[$i] =~ /values/;
push @{$features_and_values_hash{$splits[0]}}, $splits[$i];
}
}
}
$self->{_features_and_values_hash} = \%features_and_values_hash;
my %bias_hash = %{$self->{_bias_hash}};
my @biases = split /(bias[:]\s*class[:])/, $bias_string;
@biases = grep {defined($_) && length($_) > 0} @biases;
foreach my $item (@biases) {
next if $item =~ /bias/;
my @splits = split /\s+/, $item;
@splits = grep {defined($_) && length($_) > 0} @splits;
my $feature_name;
foreach my $i (0..@splits-1) {
if ($i == 0) {
$bias_hash{$splits[0]} = {};
} elsif ($splits[$i] =~ /(^.+)[:]$/) {
$feature_name = $1;
$bias_hash{$splits[0]}->{$feature_name} = [];
} else {
next if !defined $feature_name;
push @{$bias_hash{$splits[0]}->{$feature_name}}, $splits[$i]
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my @values = @{$features_and_values_hash{$feature}};
my $bias_string = $bias_hash{$class_label}->{$feature}->[0];
my $no_bias = 1.0 / @values;
$bias_string = "$values[0]" . "=$no_bias" if !defined $bias_string;
my %value_priors_to_unit_interval_map;
my @splits = split /\s*=\s*/, $bias_string;
my $chosen_for_bias_value = $splits[0];
my $chosen_bias = $splits[1];
my $remaining_bias = 1 - $chosen_bias;
my $remaining_portion_bias = $remaining_bias / (@values -1);
@splits = grep {defined($_) && length($_) > 0} @splits;
my $accumulated = 0;
foreach my $i (0..@values-1) {
if ($values[$i] eq $chosen_for_bias_value) {
$value_priors_to_unit_interval_map{$values[$i]}
= [$accumulated, $accumulated + $chosen_bias];
$accumulated += $chosen_bias;
} else {
$value_priors_to_unit_interval_map{$values[$i]}
= [$accumulated, $accumulated + $remaining_portion_bias];
$accumulated += $remaining_portion_bias;
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
1;
lib/Algorithm/RandomizedTreesForBigData.pm view on Meta::CPAN
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
1;
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
sub transpose {
my $matrix = shift;
my $num_rows = $matrix->rows();
my $num_cols = $matrix->cols();
my $transpose = Math::GSL::Matrix->new($num_cols, $num_rows);
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
}
sub display_regression_tree {
my $self = shift;
my $offset = shift;
my $serial_num = $self->get_serial_num();
if (@{$self->get_children()} > 0) {
my $feature_at_node = $self->get_feature() || " ";
my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
print "NODE $serial_num: $offset BRANCH TESTS TO NODE: @branch_features_and_values_or_thresholds\n";
my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
print "$second_line_offset" . "Decision Feature: $feature_at_node\n\n";
$offset .= " ";
foreach my $child (@{$self->get_children()}) {
$child->display_regression_tree($offset);
}
} else {
my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
print "NODE $serial_num: $offset BRANCH TESTS TO LEAF NODE: @branch_features_and_values_or_thresholds\n";
my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
}
}
1;