view release on metacpan or search on metacpan
Examples/classify_test_data_in_a_file.pl view on Meta::CPAN
@all_class_names = grep {$_ if !$seen{$_}++} values %class_for_sample_hash;
print "\n All class names: @all_class_names\n" if $debug;
%numeric_features_valuerange_hash = ();
my %feature_values_how_many_uniques_hash = ();
%features_and_unique_values_hash = ();
foreach my $feature (keys %features_and_values_hash) {
my %seen1 = ();
my @unique_values_for_feature = sort grep {$_ if $_ ne 'NA' && !$seen1{$_}++}
@{$features_and_values_hash{$feature}};
$feature_values_how_many_uniques_hash{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^\d*\.\d+$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_hash{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash{$feature} = \@unique_values_for_feature;
}
if ($debug) {
print "\nAll class names: @all_class_names\n";
print "\nEach sample data record:\n";
foreach my $sample (sort {sample_index($a) <=> sample_index($b)} keys %feature_values_for_samples_hash) {
print "$sample => @{$feature_values_for_samples_hash{$sample}}\n";
ExamplesBagging/bagging_for_bulk_classification.pl view on Meta::CPAN
@all_class_names = grep {$_ if !$seen{$_}++} values %class_for_sample_hash;
print "\n All class names: @all_class_names\n" if $debug;
%numeric_features_valuerange_hash = ();
my %feature_values_how_many_uniques_hash = ();
%features_and_unique_values_hash = ();
foreach my $feature (keys %features_and_values_hash) {
my %seen1 = ();
my @unique_values_for_feature = sort grep {$_ if $_ ne 'NA' && !$seen1{$_}++}
@{$features_and_values_hash{$feature}};
$feature_values_how_many_uniques_hash{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^\d*\.\d+$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_hash{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash{$feature} = \@unique_values_for_feature;
}
if ($debug) {
print "\nAll class names: @all_class_names\n";
print "\nEach sample data record:\n";
foreach my $sample (sort {sample_index($a) <=> sample_index($b)} keys %feature_values_for_samples_hash) {
print "$sample => @{$feature_values_for_samples_hash{$sample}}\n";
ExamplesBoosting/boosting_for_bulk_classification.pl view on Meta::CPAN
@all_class_names = grep {$_ if !$seen{$_}++} values %class_for_sample_hash;
print "\n All class names: @all_class_names\n" if $debug;
%numeric_features_valuerange_hash = ();
my %feature_values_how_many_uniques_hash = ();
%features_and_unique_values_hash = ();
foreach my $feature (keys %features_and_values_hash) {
my %seen1 = ();
my @unique_values_for_feature = sort grep {$_ if $_ ne 'NA' && !$seen1{$_}++}
@{$features_and_values_hash{$feature}};
$feature_values_how_many_uniques_hash{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^\d*\.\d+$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_hash{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash{$feature} = \@unique_values_for_feature;
}
if ($debug) {
print "\nAll class names: @all_class_names\n";
print "\nEach sample data record:\n";
foreach my $sample (sort {sample_index($a) <=> sample_index($b)} keys %feature_values_for_samples_hash) {
print "$sample => @{$feature_values_for_samples_hash{$sample}}\n";
lib/Algorithm/BoostedDecisionTree.pm view on Meta::CPAN
}
}
my %features_and_unique_values_hash = ();
my %feature_values_how_many_uniques_hash = ();
my %numeric_features_valuerange_hash = ();
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
foreach my $feature (keys %features_and_values_hash) {
my %seen = ();
my @unique_values_for_feature = grep {$_ if $_ ne 'NA' && !$seen{$_}++} @{$features_and_values_hash{$feature}};
$feature_values_how_many_uniques_hash{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_hash{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash{$feature} = \@unique_values_for_feature;
}
$self->{_all_trees}->{0}->{_class_names} = \@all_class_names;
$self->{_all_trees}->{0}->{_feature_names} = \@feature_names;
$self->{_all_trees}->{0}->{_samples_class_label_hash} = \%class_for_sample_hash;
$self->{_all_trees}->{0}->{_training_data_hash} = \%feature_values_for_samples_hash;
$self->{_all_trees}->{0}->{_features_and_values_hash} = \%features_and_values_hash;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
die "\n\nError in the names you have used for features and/or values. " .
"Try using the csv_cleanup_needed option in the constructor call."
unless $self->check_names_used(\@features_and_values);
my @new_features_and_values = ();
my $pattern = '(\S+)\s*=\s*(\S+)';
foreach my $feature_and_value (@features_and_values) {
$feature_and_value =~ /$pattern/;
my ($feature, $value) = ($1, $2);
my $newvalue = $value;
my @unique_values_for_feature = @{$self->{_features_and_unique_values_hash}->{$feature}};
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
if (! contained_in($feature, keys %{$self->{_prob_distribution_numeric_features_hash}}) &&
$not_all_values_float == 0) {
$newvalue = closest_sampling_point($value, \@unique_values_for_feature);
}
push @new_features_and_values, "$feature" . '=' . "$newvalue";
}
@features_and_values = @new_features_and_values;
print "\nCL1 New feature and values: @features_and_values\n" if $self->{_debug3};
my %answer = ();
foreach my $class_name (@{$self->{_class_names}}) {
$answer{$class_name} = undef;
}
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my $pattern3 = '(.+)>(.+)';
my @all_symbolic_features = ();
foreach my $feature_name (@{$self->{_feature_names}}) {
push @all_symbolic_features, $feature_name
if ! exists $self->{_prob_distribution_numeric_features_hash}->{$feature_name};
}
my @symbolic_features_already_used = ();
foreach my $feature_and_value_or_threshold (@features_and_values_or_thresholds_on_branch) {
push @symbolic_features_already_used, $1 if $feature_and_value_or_threshold =~ /$pattern1/;
}
my @symbolic_features_not_yet_used;
foreach my $x (@all_symbolic_features) {
push @symbolic_features_not_yet_used, $x unless contained_in($x, @symbolic_features_already_used);
}
my @true_numeric_types = ();
my @symbolic_types = ();
my @true_numeric_types_feature_names = ();
my @symbolic_types_feature_names = ();
foreach my $item (@features_and_values_or_thresholds_on_branch) {
if ($item =~ /$pattern2/) {
push @true_numeric_types, $item;
push @true_numeric_types_feature_names, $1;
} elsif ($item =~ /$pattern3/) {
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my %features_and_values_hash = map { my $a = $_; {$all_feature_names[$a] => [ map {my $b = $_; $b =~ /^\d+$/ ? sprintf("%.1f",$b) : $b} map {$data_hash{$_}->[$a-1]} keys %data_hash ]} } @{$self->{_csv_columns_for_features}};
my %numeric_features_valuerange_hash = ();
my %feature_values_how_many_uniques_hash = ();
my %features_and_unique_values_hash = ();
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
foreach my $feature (keys %features_and_values_hash) {
my %seen1 = ();
my @unique_values_for_feature = sort grep {$_ if $_ ne 'NA' && !$seen1{$_}++}
@{$features_and_values_hash{$feature}};
$feature_values_how_many_uniques_hash{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_hash{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash{$feature} = \@unique_values_for_feature;
}
if ($self->{_debug1}) {
print "\nAll class names: @all_class_names\n";
print "\nEach sample data record:\n";
foreach my $sample (sort {sample_index($a) <=> sample_index($b)} keys %feature_values_for_samples_hash) {
print "$sample => @{$feature_values_for_samples_hash{$sample}}\n";
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
print "all class names: @all_class_names\n" if $self->{_debug2};
my %numeric_features_valuerange_hash_bags = map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
my %feature_values_how_many_uniques_hash_bags = map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
my %features_and_unique_values_hash_bags = map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
foreach my $i (0 .. $self->{_how_many_bags} - 1) {
foreach my $feature (keys %{$features_and_values_hash_bags{$i}}) {
my %seen = ();
my @unique_values_for_feature_in_bag = grep {$_ if $_ ne 'NA' && !$seen{$_}++} @{$features_and_values_hash_bags{$i}{$feature}};
$feature_values_how_many_uniques_hash_bags{$i}->{$feature} = scalar @unique_values_for_feature_in_bag;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature_in_bag;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature_in_bag);
$numeric_features_valuerange_hash_bags{$i}->{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash_bags{$i}->{$feature} = \@unique_values_for_feature_in_bag;
}
}
if ($self->{_debug2}) {
print "\nDisplaying value ranges for numeric features in each bag:\n\n";
foreach my $bag_index (keys %numeric_features_valuerange_hash_bags) {
my %keyval = %{$numeric_features_valuerange_hash_bags{$bag_index}};
lib/Algorithm/RandomizedTreesForBigData.pm view on Meta::CPAN
}
my $numeric_features_valuerange_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
my $feature_values_how_many_uniques_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
my $features_and_unique_values_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
foreach my $t (0 .. $self->{_how_many_trees} - 1) {
foreach my $feature (sort keys %{$features_and_values_all_trees->{$t}}) {
my %all_values_for_feature = map {$_ => 1} @{$features_and_values_all_trees->{$t}->{$feature}};
my @unique_values_for_feature = grep {$_ ne 'NA'} keys %all_values_for_feature;
$feature_values_how_many_uniques_all_trees->{$t}->{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_all_trees->{$t}->{$feature} = \@minmaxvalues;
}
$features_and_unique_values_all_trees->{$t}->{$feature} = \@unique_values_for_feature;
}
}
if ($self->{_debug1}) {
print "\nDisplaying value ranges for numeric features for all trees:\n\n";
foreach my $tree_index (keys %{$numeric_features_valuerange_all_trees}) {
my %keyval = %{$numeric_features_valuerange_all_trees->{$tree_index}};
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
my %feature_values_for_samples_hash = map {my $sampleID = $_; "sample_" . $sampleID => [map {my $fname = $all_feature_names[$_-1]; $fname . "=" . eval{$data_hash{$sampleID}->[$_-1] =~ /^\d+$/ ? sprintf("%.1f", $data_hash{$sampleID}->[$_-1] ) : ...
my %features_and_values_hash = map { my $a = $_; {$all_feature_names[$a-1] => [ map {my $b = $_; $b =~ /^\d+$/ ? sprintf("%.1f",$b) : $b} map {$data_hash{$_}->[$a-1]} keys %data_hash ]} } @{$self->{_predictor_columns}};
my %numeric_features_valuerange_hash = ();
my %feature_values_how_many_uniques_hash = ();
my %features_and_unique_values_hash = ();
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
foreach my $feature (keys %features_and_values_hash) {
my %seen = ();
my @unique_values_for_feature = grep {$_ if $_ ne 'NA' && !$seen{$_}++} @{$features_and_values_hash{$feature}};
$feature_values_how_many_uniques_hash{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_hash{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash{$feature} = \@unique_values_for_feature;
}
if ($self->{_debug1_r}) {
print "\nDependent var values: @dependent_var_values\n";
print "\nEach sample data record:\n";
foreach my $kee (sort {sample_index($a) <=> sample_index($b)} keys %feature_values_for_samples_hash) {
print "$kee => @{$feature_values_for_samples_hash{$kee}}\n";