Algorithm-DecisionTree
view release on metacpan or search on metacpan
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
push @samples_in_other_bags, @{$data_sample_bags[$j]} if $j != $i;
}
print "\n\nin other bags for i=$i: @samples_in_other_bags\n" if $self->{_debug2};
push @{$augmented_data_sample_bags[$i]}, @{$data_sample_bags[$i]};
push @{$augmented_data_sample_bags[$i]}, map $samples_in_other_bags[rand(@samples_in_other_bags)], 0 .. $number_of_samples_needed_from_other_bags -1;
print "\naugmented bage $i: @{$augmented_data_sample_bags[$i]}\n" if $self->{_debug2};
}
}
@data_sample_bags = @augmented_data_sample_bags;
$self->{_bag_sizes} = [map scalar(@$_), @data_sample_bags];
my %class_for_sample_hash_bags = map { $_ => { map { $_ => $class_for_sample_hash{$_} } @{$data_sample_bags[$_]} } } 0 .. $self->{_how_many_bags} - 1;
if ($self->{_debug2}) {
foreach my $bag_index (keys %class_for_sample_hash_bags) {
my %keyval = %{$class_for_sample_hash_bags{$bag_index}};
print "\nFor bag $bag_index =>:\n";
foreach my $sname (keys %keyval) {
print " $sname => $keyval{$sname}\n";
}
}
}
my %feature_values_for_samples_hash_bags = map { $_ => { map { $_ => $feature_values_for_samples_hash{$_} } @{$data_sample_bags[$_]} } } 0 .. $self->{_how_many_bags} - 1;
if ($self->{_debug2}) {
print "\nDisplaying samples and their values in each bag:\n\n";
foreach my $bag_index (keys %feature_values_for_samples_hash_bags) {
my %keyval = %{$feature_values_for_samples_hash_bags{$bag_index}};
print "\nFor bag $bag_index =>:\n";
foreach my $sname (keys %keyval) {
print " $sname => @{$keyval{$sname}}\n";
}
}
}
my %features_and_values_hash = map { my $a = $_; {$all_feature_names[$a] => [ map {my $b = $_; $b =~ /^\d+$/ ? sprintf("%.1f",$b) : $b} map {$data_hash{$_}->[$a-1]} keys %data_hash ]} } @{$self->{_csv_columns_for_features}};
if ($self->{_debug2}) {
print "\nDisplaying features and their values for entire training data:\n\n";
foreach my $fname (keys %features_and_values_hash) {
print " $fname => @{$features_and_values_hash{$fname}}\n";
}
}
my %features_and_values_hash_bags = map { my $c = $_; { $c => { map { my $d = $_; {$all_feature_names[$d] => [ sort {$a cmp $b} map {my $f = $_; $f =~ /^\d+$/ ? sprintf("%.1f",$f) : $f} map {$data_hash{sample_index($_)}->[$d-1]} @{$data_sample_...
if ($self->{_debug2}) {
print "\nDisplaying features and their values in each bag:\n\n";
foreach my $bag_index (keys %features_and_values_hash_bags) {
my %keyval = %{$features_and_values_hash_bags{$bag_index}};
print "\nFor bag $bag_index =>:\n";
foreach my $fname (keys %keyval) {
print " $fname => @{$keyval{$fname}}\n";
}
}
}
my @all_class_names = sort keys %{ {map {$_ => 1} values %class_for_sample_hash } };
print "all class names: @all_class_names\n" if $self->{_debug2};
my %numeric_features_valuerange_hash_bags = map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
my %feature_values_how_many_uniques_hash_bags = map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
my %features_and_unique_values_hash_bags = map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
foreach my $i (0 .. $self->{_how_many_bags} - 1) {
foreach my $feature (keys %{$features_and_values_hash_bags{$i}}) {
my %seen = ();
my @unique_values_for_feature_in_bag = grep {$_ if $_ ne 'NA' && !$seen{$_}++} @{$features_and_values_hash_bags{$i}{$feature}};
$feature_values_how_many_uniques_hash_bags{$i}->{$feature} = scalar @unique_values_for_feature_in_bag;
my $not_all_values_float = 0;
map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature_in_bag;
if ($not_all_values_float == 0) {
my @minmaxvalues = minmax(\@unique_values_for_feature_in_bag);
$numeric_features_valuerange_hash_bags{$i}->{$feature} = \@minmaxvalues;
}
$features_and_unique_values_hash_bags{$i}->{$feature} = \@unique_values_for_feature_in_bag;
}
}
if ($self->{_debug2}) {
print "\nDisplaying value ranges for numeric features in each bag:\n\n";
foreach my $bag_index (keys %numeric_features_valuerange_hash_bags) {
my %keyval = %{$numeric_features_valuerange_hash_bags{$bag_index}};
print "\nFor bag $bag_index =>:\n";
foreach my $fname (keys %keyval) {
print " $fname => @{$keyval{$fname}}\n";
}
}
print "\nDisplaying number of unique values for each features in each bag:\n\n";
foreach my $bag_index (keys %feature_values_how_many_uniques_hash_bags) {
my %keyval = %{$feature_values_how_many_uniques_hash_bags{$bag_index}};
print "\nFor bag $bag_index =>:\n";
foreach my $fname (keys %keyval) {
print " $fname => $keyval{$fname}\n";
}
}
print "\nDisplaying unique values for all features in each bag:\n\n";
foreach my $bag_index (keys %features_and_unique_values_hash_bags) {
my %keyval = %{$features_and_unique_values_hash_bags{$bag_index}};
print "\nFor bag $bag_index =>:\n";
foreach my $fname (keys %keyval) {
print " $fname => @{$keyval{$fname}}\n";
}
}
}
foreach my $i (0..$self->{_how_many_bags}-1) {
$self->{_all_trees}->{$i}->{_class_names} = \@all_class_names;
$self->{_all_trees}->{$i}->{_feature_names} = \@feature_names;
$self->{_all_trees}->{$i}->{_samples_class_label_hash} = $class_for_sample_hash_bags{$i};
$self->{_all_trees}->{$i}->{_training_data_hash} = $feature_values_for_samples_hash_bags{$i};
$self->{_all_trees}->{$i}->{_features_and_values_hash} = $features_and_values_hash_bags{$i};
$self->{_all_trees}->{$i}->{_features_and_unique_values_hash} = $features_and_unique_values_hash_bags{$i};
$self->{_all_trees}->{$i}->{_numeric_features_valuerange_hash} = $numeric_features_valuerange_hash_bags{$i};
$self->{_all_trees}->{$i}->{_feature_values_how_many_uniques_hash} = $feature_values_how_many_uniques_hash_bags{$i};
}
if ($self->{_debug1}) {
foreach my $i (0..$self->{_how_many_bags}-1) {
print "\n\n============================= For bag $i ==================================\n";
print "\nAll class names: @{$self->{_all_trees}->{$i}->{_class_names}}\n";
print "\nSamples and their feature values in each bag:\n";
foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$i}->{_training_data_hash}}) {
print "$item => @{$self->{_all_trees}->{$i}->{_training_data_hash}->{$item}}\n";
}
print "\nclass label for each data sample in each bag:\n";
foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$i}->{_samples_class_label_hash}} ) {
print "$item => $self->{_all_trees}->{$i}->{_samples_class_label_hash}->{$item}\n";
}
print "\nfeatures and the values taken by them:\n";
foreach my $item (sort keys %{$self->{_all_trees}->{$i}->{_features_and_values_hash}}) {
print "$item => @{$self->{_all_trees}->{$i}->{_features_and_values_hash}->{$item}}\n";
}
print "\nnumeric features and their ranges:\n";
foreach my $item (sort keys %{$self->{_all_trees}->{$i}->{_numeric_features_valuerange_hash}}) {
( run in 1.359 second using v1.01-cache-2.11-cpan-f56aa216473 )