Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

lib/Algorithm/RandomizedTreesForBigData.pm  view on Meta::CPAN

        foreach my $t (0 .. $self->{_how_many_trees} - 1) {
            my @record_labels = keys %{$data_hash_for_all_trees->{$t}};
            print "\n\nFor tree $t: record labels: @record_labels\n";
            for my $kee (sort keys %{$data_hash_for_all_trees->{$t}}) {
                print "$kee   ----->   @{$data_hash_for_all_trees->{$t}->{$kee}}\n";
            }
        }
    }
    my @all_feature_names = split /,/, $firstline;
    my $class_column_heading = $all_feature_names[$self->{_csv_class_column_index}];
    my @feature_names = map {$all_feature_names[$_]} @{$self->{_csv_columns_for_features}};
    print "\n\nclass column heading: $class_column_heading\n";
    print "feature names: @feature_names\n";
    my $class_for_sample_all_trees = {map {my $t = $_; $t => {map {my $lbl = $_; "sample_$lbl" => "$class_column_heading=$data_hash_for_all_trees->{$t}->{$lbl}->[$self->{_csv_class_column_index} - 1]" } keys %{$data_hash_for_all_trees->{$t}} } }  0 ....
    if ($self->{_debug1}) {
        foreach my $t (0 .. $self->{_how_many_trees} - 1) {
            my @sample_labels = keys %{$class_for_sample_all_trees->{$t}};
            print "\n\nFor tree $t: sample labels: @sample_labels\n";    
            for my $kee (sort keys %{$class_for_sample_all_trees->{$t}}) {
                print "$kee   ----->   $class_for_sample_all_trees->{$t}->{$kee}\n";
            }
        }
    }
    my $sample_names_in_all_trees = {map {my $t = $_; $t => [map {"sample_$_"} keys %{$data_hash_for_all_trees->{$t}}]}  0 .. $self->{_how_many_trees} - 1};
    my $feature_values_for_samples_all_trees = {map {my $t = $_; $t => {map {my $key = $_; "sample_$key" => [map {my $feature_name = $all_feature_names[$_]; "$feature_name=$data_hash_for_all_trees->{$t}->{$key}->[$_-1]"} @{$self->{_csv_columns_for_fe...
    if ($self->{_debug1}) {
        foreach my $t (0 .. $self->{_how_many_trees} - 1) {
            my @sample_labels = keys %{$feature_values_for_samples_all_trees->{$t}};
            print "\n\nFor tree $t: sample labels: @sample_labels\n";    
            for my $kee (sort keys %{$feature_values_for_samples_all_trees->{$t}}) {
                print "$kee   ----->   @{$feature_values_for_samples_all_trees->{$t}->{$kee}}\n";
            }
        }
    }
    my $features_and_values_all_trees = {map {my $t = $_; $t => {map {my $i = $_; $all_feature_names[$i] => [map {my $key = $_; $data_hash_for_all_trees->{$t}->{$key}->[$i-1]} keys %{$data_hash_for_all_trees->{$t}}]} @{$self->{_csv_columns_for_featur...
    if ($self->{_debug1}) {
        foreach my $t (0 .. $self->{_how_many_trees} - 1) {
            my @feature_labels = keys %{$features_and_values_all_trees->{$t}};
            print "\n\nFor tree $t: feature labels: @feature_labels\n";    
            for my $kee (sort keys %{$features_and_values_all_trees->{$t}}) {
                print "$kee   ----->   @{$features_and_values_all_trees->{$t}->{$kee}}\n";
            }
        }
    }
    my $all_class_names_all_trees = {map {my $t = $_; my %all_class_labels_in_tree = map {$_ => 1} values %{$class_for_sample_all_trees->{$t}}; my @uniques = keys %all_class_labels_in_tree; $t => \@uniques } 0 .. $self->{_how_many_trees} - 1};
    if ($self->{_debug1}) {
        foreach my $t (0 .. $self->{_how_many_trees} - 1) {
            my @unique_class_names_for_tree = @{$all_class_names_all_trees->{$t}};
            print "\n\nFor tree $t: all unique class names: @unique_class_names_for_tree\n";    
        }
    }
    my $numeric_features_valuerange_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
    my $feature_values_how_many_uniques_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
    my $features_and_unique_values_all_trees = {map {my $t = $_; $t => {}} 0 .. $self->{_how_many_trees} - 1};
    my $numregex =  '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
    foreach my $t (0 .. $self->{_how_many_trees} - 1) {    
        foreach my $feature (sort keys %{$features_and_values_all_trees->{$t}}) {
            my %all_values_for_feature =  map {$_ => 1} @{$features_and_values_all_trees->{$t}->{$feature}};
            my @unique_values_for_feature = grep {$_ ne 'NA'} keys %all_values_for_feature;
            $feature_values_how_many_uniques_all_trees->{$t}->{$feature} = scalar @unique_values_for_feature;
            my $not_all_values_float = 0;
            map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
            if ($not_all_values_float == 0) {
                my @minmaxvalues = minmax(\@unique_values_for_feature);
                $numeric_features_valuerange_all_trees->{$t}->{$feature} = \@minmaxvalues; 
            }
            $features_and_unique_values_all_trees->{$t}->{$feature} = \@unique_values_for_feature;            
        }
    }
    if ($self->{_debug1}) {
        print "\nDisplaying value ranges for numeric features for all trees:\n\n";
        foreach my $tree_index (keys  %{$numeric_features_valuerange_all_trees}) {        
            my %keyval = %{$numeric_features_valuerange_all_trees->{$tree_index}};
            print "\nFor tree $tree_index  =>:\n";
            foreach my $fname (keys %keyval) {
                print "      $fname    =>  @{$keyval{$fname}}\n";
            }
        }
        print "\nDisplaying number of unique values for each features for each tree:\n\n";
        foreach my $tree_index (keys  %{$feature_values_how_many_uniques_all_trees}) {    
            my %keyval = %{$feature_values_how_many_uniques_all_trees->{$tree_index}};
            print "\nFor tree $tree_index  =>:\n";
            foreach my $fname (keys %keyval) {
                print "      $fname    =>  $keyval{$fname}\n";
            }
        }
        print "\nDisplaying unique values for all features for all trees:\n\n";
        foreach my $tree_index (keys  %{$features_and_unique_values_all_trees}) {  
            my %keyval = %{$features_and_unique_values_all_trees->{$tree_index}};
            print "\nFor tree $tree_index  =>:\n";
            foreach my $fname (keys %keyval) {
                print "      $fname    =>  @{$keyval{$fname}}\n";
            }
        }
    }
    foreach my $t (0..$self->{_how_many_trees}-1) {
        $self->{_all_trees}->{$t}->{_class_names} = $all_class_names_all_trees->{$t};
        $self->{_all_trees}->{$t}->{_feature_names} = \@feature_names;
        $self->{_all_trees}->{$t}->{_samples_class_label_hash} = $class_for_sample_all_trees->{$t};
        $self->{_all_trees}->{$t}->{_training_data_hash}  =  $feature_values_for_samples_all_trees->{$t};
        $self->{_all_trees}->{$t}->{_features_and_values_hash}    =  $features_and_values_all_trees->{$t};
        $self->{_all_trees}->{$t}->{_features_and_unique_values_hash} = $features_and_unique_values_all_trees->{$t};
        $self->{_all_trees}->{$t}->{_numeric_features_valuerange_hash} = $numeric_features_valuerange_all_trees->{$t}; 
        $self->{_all_trees}->{$t}->{_feature_values_how_many_uniques_hash} = $feature_values_how_many_uniques_all_trees->{$t};
    }
    if ($self->{_debug1}) {
        foreach my $t (0..$self->{_how_many_trees}-1) {
            print "\n\n=============================   For Tree $t   ==================================\n";
            print "\nAll class names: @{$self->{_all_trees}->{$t}->{_class_names}}\n";
            print "\nSamples and their feature values for each tree:\n";
            foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$t}->{_training_data_hash}}) {
                print "$item  =>  @{$self->{_all_trees}->{$t}->{_training_data_hash}->{$item}}\n";
            }
            print "\nclass label for each data sample for each tree:\n";
            foreach my $item (sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_all_trees}->{$t}->{_samples_class_label_hash}} ) {
                print "$item  =>  $self->{_all_trees}->{$t}->{_samples_class_label_hash}->{$item}\n";
            }
            print "\nfeatures and the values taken by them:\n";
            foreach my $item (sort keys %{$self->{_all_trees}->{$t}->{_features_and_values_hash}}) {
                print "$item  =>  @{$self->{_all_trees}->{$t}->{_features_and_values_hash}->{$item}}\n";
            }
            print "\nnumeric features and their ranges:\n";            
            foreach my $item (sort keys %{$self->{_all_trees}->{$t}->{_numeric_features_valuerange_hash}}) {



( run in 1.931 second using v1.01-cache-2.11-cpan-5a3173703d6 )