Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

lib/Algorithm/DecisionTreeWithBagging.pm  view on Meta::CPAN

                push @samples_in_other_bags, @{$data_sample_bags[$j]} if $j != $i;
            }
            print "\n\nin other bags for i=$i: @samples_in_other_bags\n" if $self->{_debug2};
            push @{$augmented_data_sample_bags[$i]}, @{$data_sample_bags[$i]};
            push @{$augmented_data_sample_bags[$i]}, map $samples_in_other_bags[rand(@samples_in_other_bags)], 0 .. $number_of_samples_needed_from_other_bags -1;
            print "\naugmented bage $i: @{$augmented_data_sample_bags[$i]}\n" if $self->{_debug2};
        }
    }
    @data_sample_bags = @augmented_data_sample_bags;
    $self->{_bag_sizes} = [map scalar(@$_), @data_sample_bags];
    my %class_for_sample_hash_bags =  map { $_ => { map { $_ => $class_for_sample_hash{$_} } @{$data_sample_bags[$_]} } } 0 .. $self->{_how_many_bags} - 1;
    if ($self->{_debug2}) {
        foreach my $bag_index (keys  %class_for_sample_hash_bags) {    
            my %keyval = %{$class_for_sample_hash_bags{$bag_index}};
            print "\nFor bag $bag_index  =>:\n";
            foreach my $sname (keys %keyval) {
                print "      $sname    =>  $keyval{$sname}\n";
            }
        }
    }
    my %feature_values_for_samples_hash_bags = map { $_ => { map { $_ => $feature_values_for_samples_hash{$_} } @{$data_sample_bags[$_]} } } 0 .. $self->{_how_many_bags} - 1;
    if ($self->{_debug2}) {
        print "\nDisplaying samples and their values in each bag:\n\n";
        foreach my $bag_index (keys  %feature_values_for_samples_hash_bags) {   
            my %keyval = %{$feature_values_for_samples_hash_bags{$bag_index}};
            print "\nFor bag $bag_index  =>:\n";
            foreach my $sname (keys %keyval) {
                print "      $sname    =>  @{$keyval{$sname}}\n";
            }
        }
    }
    my %features_and_values_hash = map { my $a = $_; {$all_feature_names[$a] => [  map {my $b = $_; $b =~ /^\d+$/ ? sprintf("%.1f",$b) : $b} map {$data_hash{$_}->[$a-1]} keys %data_hash ]} } @{$self->{_csv_columns_for_features}};     
    if ($self->{_debug2}) {
        print "\nDisplaying features and their values for entire training data:\n\n";
        foreach my $fname (keys  %features_and_values_hash) {         
            print "        $fname    =>  @{$features_and_values_hash{$fname}}\n";
        }
    }
    my %features_and_values_hash_bags =  map { my $c = $_; { $c =>  { map { my $d = $_; {$all_feature_names[$d] => [ sort {$a cmp $b} map {my $f = $_; $f =~ /^\d+$/ ? sprintf("%.1f",$f) : $f} map {$data_hash{sample_index($_)}->[$d-1]} @{$data_sample_...
    if ($self->{_debug2}) {
        print "\nDisplaying features and their values in each bag:\n\n";
        foreach my $bag_index (keys  %features_and_values_hash_bags) {           
            my %keyval = %{$features_and_values_hash_bags{$bag_index}};
            print "\nFor bag $bag_index  =>:\n";
            foreach my $fname (keys %keyval) {
                print "      $fname    =>  @{$keyval{$fname}}\n";
            }
        }
    }
    my @all_class_names =  sort keys %{ {map {$_ => 1} values %class_for_sample_hash } };
    print "all class names: @all_class_names\n" if $self->{_debug2};
    my %numeric_features_valuerange_hash_bags   =   map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
    my %feature_values_how_many_uniques_hash_bags   =   map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
    my %features_and_unique_values_hash_bags   =   map {$_ => {}} 0 .. $self->{_how_many_bags} - 1;
    my $numregex =  '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
    foreach my $i (0 .. $self->{_how_many_bags} - 1) {
        foreach my $feature (keys %{$features_and_values_hash_bags{$i}}) {
            my %seen = ();
            my @unique_values_for_feature_in_bag =  grep {$_ if $_ ne 'NA' && !$seen{$_}++} @{$features_and_values_hash_bags{$i}{$feature}};
            $feature_values_how_many_uniques_hash_bags{$i}->{$feature} = scalar @unique_values_for_feature_in_bag;
            my $not_all_values_float = 0;
            map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature_in_bag;
            if ($not_all_values_float == 0) {
                my @minmaxvalues = minmax(\@unique_values_for_feature_in_bag);
                $numeric_features_valuerange_hash_bags{$i}->{$feature} = \@minmaxvalues; 
            }
            $features_and_unique_values_hash_bags{$i}->{$feature} = \@unique_values_for_feature_in_bag;
        }
    }
    if ($self->{_debug2}) {
        print "\nDisplaying value ranges for numeric features in each bag:\n\n";
        foreach my $bag_index (keys  %numeric_features_valuerange_hash_bags) {        
            my %keyval = %{$numeric_features_valuerange_hash_bags{$bag_index}};
            print "\nFor bag $bag_index  =>:\n";
            foreach my $fname (keys %keyval) {
                print "      $fname    =>  @{$keyval{$fname}}\n";
            }
        }
        print "\nDisplaying number of unique values for each features in each bag:\n\n";
        foreach my $bag_index (keys  %feature_values_how_many_uniques_hash_bags) {    
            my %keyval = %{$feature_values_how_many_uniques_hash_bags{$bag_index}};
            print "\nFor bag $bag_index  =>:\n";
            foreach my $fname (keys %keyval) {
                print "      $fname    =>  $keyval{$fname}\n";
            }
        }
        print "\nDisplaying unique values for all features in each bag:\n\n";
        foreach my $bag_index (keys  %features_and_unique_values_hash_bags) {  
            my %keyval = %{$features_and_unique_values_hash_bags{$bag_index}};
            print "\nFor bag $bag_index  =>:\n";
            foreach my $fname (keys %keyval) {
                print "      $fname    =>  @{$keyval{$fname}}\n";
            }
        }
    }
    foreach my $i (0..$self->{_how_many_bags}-1) {
        $self->{_all_trees}->{$i}->{_class_names} = \@all_class_names;
        $self->{_all_trees}->{$i}->{_feature_names} = \@feature_names;
        $self->{_all_trees}->{$i}->{_samples_class_label_hash} = $class_for_sample_hash_bags{$i};
        $self->{_all_trees}->{$i}->{_training_data_hash}  =  $feature_values_for_samples_hash_bags{$i};
        $self->{_all_trees}->{$i}->{_features_and_values_hash}    =  $features_and_values_hash_bags{$i};
        $self->{_all_trees}->{$i}->{_features_and_unique_values_hash} = $features_and_unique_values_hash_bags{$i};
        $self->{_all_trees}->{$i}->{_numeric_features_valuerange_hash} = $numeric_features_valuerange_hash_bags{$i}; 
        $self->{_all_trees}->{$i}->{_feature_values_how_many_uniques_hash} = $feature_values_how_many_uniques_hash_bags{$i};
    }
    if ($self->{_debug1}) {
        foreach my $i (0..$self->{_how_many_bags}-1) {
            print "\n\n=============================   For bag $i   ==================================\n";
            print "\nAll class names: @{$self->{_all_trees}->{$i}->{_class_names}}\n";
            print "\nSamples and their feature values in each bag:\n";
            foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$i}->{_training_data_hash}}) {
                print "$item  =>  @{$self->{_all_trees}->{$i}->{_training_data_hash}->{$item}}\n";
            }
            print "\nclass label for each data sample in each bag:\n";
            foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$i}->{_samples_class_label_hash}} ) {
                print "$item  =>  $self->{_all_trees}->{$i}->{_samples_class_label_hash}->{$item}\n";
            }
            print "\nfeatures and the values taken by them:\n";
            foreach my $item (sort keys %{$self->{_all_trees}->{$i}->{_features_and_values_hash}}) {
                print "$item  =>  @{$self->{_all_trees}->{$i}->{_features_and_values_hash}->{$item}}\n";
            }
            print "\nnumeric features and their ranges:\n";            
            foreach my $item (sort keys %{$self->{_all_trees}->{$i}->{_numeric_features_valuerange_hash}}) {



( run in 1.359 second using v1.01-cache-2.11-cpan-f56aa216473 )