Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

ExamplesRandomizedTrees/classify_database_records.pl  view on Meta::CPAN

        my @interleaved = ( @fields_of_interest, @feature_vals_of_interest )[ map { $_, $_ + @fields_of_interest } ( 0 .. $#fields_of_interest ) ];
        my @features_and_vals = pairmap { "$a=$b" } @interleaved;
        $record_ids_with_features_and_vals{$record_lbl} = \@features_and_vals;
    }
    last if @records_to_classify_local == 0;
    $record_index++; 
}
close FILEIN;

# Now classify all the records extracted from the database file:    
my %original_classifications;
my %calculated_classifications;
foreach my $record_index (sort {$a <=> $b} keys %record_ids_with_features_and_vals) {
    my @test_sample = @{$record_ids_with_features_and_vals{$record_index}};
    # Let's now get rid of those feature=value combos when value is 'NA'    
    my $unknown_value_for_a_feature_flag;
    map {$unknown_value_for_a_feature_flag = 1 if $_ =~ /=NA$/} @test_sample;
    next if $unknown_value_for_a_feature_flag;
    $rt->classify_with_all_trees( \@test_sample );
    my $classification = $rt->get_majority_vote_classification();
    printf("\nclassification for %5d: %10s       original classification: %s", $record_index, $classification, $record_ids_with_class_labels{$record_index});
    $original_classifications{$record_index} = $record_ids_with_class_labels{$record_index};
    $classification =~ /=(.+)$/;
    $calculated_classifications{$record_index} = $1;
}

my $total_errors = 0;
my @confusion_matrix_row1 = (0,0);
my @confusion_matrix_row2 = (0,0);

print "\n\nCalculating the error rate and the confusion matrix:\n";
foreach my $record_index (sort keys %calculated_classifications) {
    $total_errors += 1 if $original_classifications{$record_index} ne $calculated_classifications{$record_index};    
    if ($original_classifications{$record_index} eq $class_names_used_in_database[1]) {
        if ($calculated_classifications{$record_index} eq $class_names_used_in_database[1]) {
            $confusion_matrix_row1[0] += 1;
        } else {
            $confusion_matrix_row1[1] += 1;
        }
    }
    if ($original_classifications{$record_index} eq $class_names_used_in_database[0]) {
        if ($calculated_classifications{$record_index} eq $class_names_used_in_database[1]) {
            $confusion_matrix_row2[0] += 1;
        } else {
            $confusion_matrix_row2[1] += 1;
        }
    }
}

my $percentage_errors =  ($total_errors * 100.0) / scalar keys %calculated_classifications;
print "\n\nClassification error rate: $percentage_errors\n";

ExamplesRandomizedTrees/classify_database_records.pl  view on Meta::CPAN

                }
                last if @records_to_classify_local == 0;
                $record_index++; 
            }
            close FILEIN;
            # Now classify all the records extracted from the database file:    
            foreach my $record_index (keys %record_ids_with_features_and_vals) {
                my $test_sample = $record_ids_with_features_and_vals{$record_index};
                $rt->classify_with_all_trees( $test_sample );
                my $classification = $rt->get_majority_vote_classification();
                printf("\nclassification for %5d: %10s       original classification: %s", $record_index, $classification, $record_ids_with_class_labels{$record_index});
            }
        } else {
            print "\nYou are allowed to enter only 'y' or 'n'.  Try again.";
        }
    }
}

####################################### support functions #################################

sub all_record_labels_in_database {

lib/Algorithm/BoostedDecisionTree.pm  view on Meta::CPAN

    }
}

sub evaluate_one_stage_of_cascade {
    my $self = shift;
    my $trainingDT = shift;
    my $root_node = shift;
    my @misclassified_samples = ();
    foreach my $test_sample_name (@{$self->{_all_sample_names}}) {
        my @test_sample_data = @{$self->{_all_trees}->{0}->{_training_data_hash}->{$test_sample_name}};
        print "original data in $test_sample_name:@test_sample_data\n" if $self->{_stagedebug};
        @test_sample_data = map {$_ if $_ !~ /=NA$/} @test_sample_data;
        print "$test_sample_name: @test_sample_data\n" if $self->{_stagedebug}; 
        my %classification = %{$trainingDT->classify($root_node, \@test_sample_data)};
        my @solution_path = @{$classification{'solution_path'}};                                  
        delete $classification{'solution_path'};                                              
        my @which_classes = keys %classification;
        @which_classes = sort {$classification{$b} <=> $classification{$a}} @which_classes;
        my $most_likely_class_label = $which_classes[0];
        if ($self->{_stagedebug}) {
            print "\nClassification:\n\n";

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

            $trainingDT->{_feature_values_how_many_uniques_hash}->{$feature} = 
                scalar @{$trainingDT->{_features_and_unique_values_hash}->{$feature}};
        }
        $trainingDT->{_debug2} = 1 if $evaldebug;
        $trainingDT->calculate_first_order_probabilities();
        $trainingDT->calculate_class_priors();
        my $root_node = $trainingDT->construct_decision_tree_classifier();
        $root_node->display_decision_tree("     ") if $evaldebug;
        foreach my $test_sample_name (@testing_samples) {
            my @test_sample_data = @{$all_training_data{$test_sample_name}};
            print "original data in test sample: @test_sample_data\n" if $evaldebug;
            @test_sample_data = grep {$_ if $_ && $_ !~ /=NA$/} @test_sample_data;
            print "filtered data in test sample: @test_sample_data\n" if $evaldebug;
            my %classification = %{$trainingDT->classify($root_node, \@test_sample_data)};
            my @solution_path = @{$classification{'solution_path'}};
            delete $classification{'solution_path'};
            my @which_classes = keys %classification;
            @which_classes = sort {$classification{$b} <=> $classification{$a}} @which_classes;
            my $most_likely_class_label = $which_classes[0];
            if ($evaldebug) {
                print "\nClassification:\n\n";



( run in 0.240 second using v1.01-cache-2.11-cpan-069f9db706d )