Algorithm-DecisionTree
view release on metacpan or search on metacpan
lib/Algorithm/RandomizedTreesForBigData.pm view on Meta::CPAN
foreach my $t (0 .. $self->{_how_many_trees} - 1) {
my @record_labels = keys %{$data_hash_for_all_trees->{$t}};
print "\n\nFor tree $t: record labels: @record_labels\n";
for my $kee (sort keys %{$data_hash_for_all_trees->{$t}}) {
print "$kee -----> @{$data_hash_for_all_trees->{$t}->{$kee}}\n";
}
}
}
my @all_feature_names = split /,/, $firstline;
my $class_column_heading = $all_feature_names[$self->{_csv_class_column_index}];
my @feature_names = map {$all_feature_names[$_]} @{$self->{_csv_columns_for_features}};
print "\n\nclass column heading: $class_column_heading\n";
print "feature names: @feature_names\n";
my $class_for_sample_all_trees = {map {my $t = $_; $t => {map {my $lbl = $_; "sample_$lbl" => "$class_column_heading=$data_hash_for_all_trees->{$t}->{$lbl}->[$self->{_csv_class_column_index} - 1]" } keys %{$data_hash_for_all_trees->{$t}} } } 0 ....
if ($self->{_debug1}) {
foreach my $t (0 .. $self->{_how_many_trees} - 1) {
my @sample_labels = keys %{$class_for_sample_all_trees->{$t}};
print "\n\nFor tree $t: sample labels: @sample_labels\n";
for my $kee (sort keys %{$class_for_sample_all_trees->{$t}}) {
print "$kee -----> $class_for_sample_all_trees->{$t}->{$kee}\n";
}
}
}
my $sample_names_in_all_trees = {map {my $t = $_; $t => [map {"sample_$_"} keys %{$data_hash_for_all_trees->{$t}}]} 0 .. $self->{_how_many_trees} - 1};
my $feature_values_for_samples_all_trees = {map {my $t = $_; $t => {map {my $key = $_; "sample_$key" => [map {my $feature_name = $all_feature_names[$_]; "$feature_name=$data_hash_for_all_trees->{$t}->{$key}->[$_-1]"} @{$self->{_csv_columns_for_fe...
if ($self->{_debug1}) {
foreach my $t (0 .. $self->{_how_many_trees} - 1) {
my @sample_labels = keys %{$feature_values_for_samples_all_trees->{$t}};
print "\n\nFor tree $t: sample labels: @sample_labels\n";
for my $kee (sort keys %{$feature_values_for_samples_all_trees->{$t}}) {
print "$kee -----> @{$feature_values_for_samples_all_trees->{$t}->{$kee}}\n";
}
}
}
my $features_and_values_all_trees = {map {my $t = $_; $t => {map {my $i = $_; $all_feature_names[$i] => [map {my $key = $_; $data_hash_for_all_trees->{$t}->{$key}->[$i-1]} keys %{$data_hash_for_all_trees->{$t}}]} @{$self->{_csv_columns_for_featur...
if ($self->{_debug1}) {
foreach my $t (0 .. $self->{_how_many_trees} - 1) {
my @feature_labels = keys %{$features_and_values_all_trees->{$t}};
print "\n\nFor tree $t: feature labels: @feature_labels\n";
for my $kee (sort keys %{$features_and_values_all_trees->{$t}}) {
print "$kee -----> @{$features_and_values_all_trees->{$t}->{$kee}}\n";
}
}
}
my $all_class_names_all_trees = {map {my $t = $_; my %all_class_labels_in_tree = map {$_ => 1} values %{$class_for_sample_all_trees->{$t}}; my @uniques = keys %all_class_labels_in_tree; $t => \@uniques } 0 .. $self->{_how_many_trees} - 1};
if ($self->{_debug1}) {
foreach my $t (0 .. $self->{_how_many_trees} - 1) {
my @unique_class_names_for_tree = @{$all_class_names_all_trees->{$t}};
print "\n\nFor tree $t: all unique class names: @unique_class_names_for_tree\n";
}
}
my $numeric_features_valuerange_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
my $feature_values_how_many_uniques_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
my $features_and_unique_values_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
foreach my $t (0 .. $self->{_how_many_trees} - 1) {
foreach my $feature (sort keys %{$features_and_values_all_trees->{$t}}) {
my %all_values_for_feature = map {$_ => 1} @{$features_and_values_all_trees->{$t}->{$feature}};
my @unique_values_for_feature = grep {$_ ne 'NA'} keys %all_values_for_feature;
$feature_values_how_many_uniques_all_trees->{$t}->{$feature} = scalar @unique_values_for_feature;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature);
$numeric_features_valuerange_all_trees->{$t}->{$feature} = \@minmaxvalues;
}
$features_and_unique_values_all_trees->{$t}->{$feature} = \@unique_values_for_feature;
}
}
if ($self->{_debug1}) {
print "\nDisplaying value ranges for numeric features for all trees:\n\n";
foreach my $tree_index (keys %{$numeric_features_valuerange_all_trees}) {
my %keyval = %{$numeric_features_valuerange_all_trees->{$tree_index}};
print "\nFor tree $tree_index =>:\n";
foreach my $fname (keys %keyval) {
print " $fname => @{$keyval{$fname}}\n";
}
}
print "\nDisplaying number of unique values for each features for each tree:\n\n";
foreach my $tree_index (keys %{$feature_values_how_many_uniques_all_trees}) {
my %keyval = %{$feature_values_how_many_uniques_all_trees->{$tree_index}};
print "\nFor tree $tree_index =>:\n";
foreach my $fname (keys %keyval) {
print " $fname => $keyval{$fname}\n";
}
}
print "\nDisplaying unique values for all features for all trees:\n\n";
foreach my $tree_index (keys %{$features_and_unique_values_all_trees}) {
my %keyval = %{$features_and_unique_values_all_trees->{$tree_index}};
print "\nFor tree $tree_index =>:\n";
foreach my $fname (keys %keyval) {
print " $fname => @{$keyval{$fname}}\n";
}
}
}
foreach my $t (0..$self->{_how_many_trees}-1) {
$self->{_all_trees}->{$t}->{_class_names} = $all_class_names_all_trees->{$t};
$self->{_all_trees}->{$t}->{_feature_names} = \@feature_names;
$self->{_all_trees}->{$t}->{_samples_class_label_hash} = $class_for_sample_all_trees->{$t};
$self->{_all_trees}->{$t}->{_training_data_hash} = $feature_values_for_samples_all_trees->{$t};
$self->{_all_trees}->{$t}->{_features_and_values_hash} = $features_and_values_all_trees->{$t};
$self->{_all_trees}->{$t}->{_features_and_unique_values_hash} = $features_and_unique_values_all_trees->{$t};
$self->{_all_trees}->{$t}->{_numeric_features_valuerange_hash} = $numeric_features_valuerange_all_trees->{$t};
$self->{_all_trees}->{$t}->{_feature_values_how_many_uniques_hash} = $feature_values_how_many_uniques_all_trees->{$t};
}
if ($self->{_debug1}) {
foreach my $t (0..$self->{_how_many_trees}-1) {
print "\n\n============================= For Tree $t ==================================\n";
print "\nAll class names: @{$self->{_all_trees}->{$t}->{_class_names}}\n";
print "\nSamples and their feature values for each tree:\n";
foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$t}->{_training_data_hash}}) {
print "$item => @{$self->{_all_trees}->{$t}->{_training_data_hash}->{$item}}\n";
}
print "\nclass label for each data sample for each tree:\n";
foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$t}->{_samples_class_label_hash}} ) {
print "$item => $self->{_all_trees}->{$t}->{_samples_class_label_hash}->{$item}\n";
}
print "\nfeatures and the values taken by them:\n";
foreach my $item (sort keys %{$self->{_all_trees}->{$t}->{_features_and_values_hash}}) {
print "$item => @{$self->{_all_trees}->{$t}->{_features_and_values_hash}->{$item}}\n";
}
print "\nnumeric features and their ranges:\n";
foreach my $item (sort keys %{$self->{_all_trees}->{$t}->{_numeric_features_valuerange_hash}}) {
( run in 1.931 second using v1.01-cache-2.11-cpan-5a3173703d6 )