Algorithm-DecisionTree
view release on metacpan or search on metacpan
Examples/classify_test_data_in_a_file.pl view on Meta::CPAN
### UNCOMMENT THE NEXT STATEMENT if you would like to see
### the decision tree displayed in your terminal window:
#$root_node->display_decision_tree(" ");
# NOW YOU ARE READY TO CLASSIFY THE FILE BASED TEST DATA:
my (@all_class_names, @feature_names, %class_for_sample_hash, %feature_values_for_samples_hash,
%features_and_values_hash, %features_and_unique_values_hash,
%numeric_features_valuerange_hash, %feature_values_how_many_uniques_hash);
get_test_data_from_csv();
open OUTPUTHANDLE, ">$outputfile"
or die "Unable to open the file $outputfile for writing out the classification results: $!";
if ($show_hard_classifications && ($outputfile !~ /\.csv$/i)) {
print OUTPUTHANDLE "\nOnly the most probable class shown for each test sample\n\n";
} elsif (!$show_hard_classifications && ($outputfile !~ /\.csv$/i)) {
print OUTPUTHANDLE "\nThe classification result for each sample ordered in decreasing order of probability\n\n";
}
if ($outputfile =~ /\.csv$/i) {
my $class_names_csv = join ',', sort @{$dt->{_class_names}};
my $output_string = "sample_index,$class_names_csv\n";
print OUTPUTHANDLE "$output_string";
foreach my $sample (sort {sample_index($a) <=> sample_index($b)}
keys %feature_values_for_samples_hash) {
my @test_sample = @{$feature_values_for_samples_hash{$sample}};
my %classification = %{$dt->classify($root_node, \@test_sample)};
my $sample_index = sample_index($sample);
my @solution_path = @{$classification{'solution_path'}};
delete $classification{'solution_path'};
my @which_classes = sort keys %classification;
$output_string = "$sample_index";
foreach my $which_class (@which_classes) {
$which_class =~ /=(.*)/;
my $class_name = $1;
my $valuestring = $classification{$which_class};
$output_string .= ",$valuestring";
}
print OUTPUTHANDLE "$output_string\n";
}
} else {
foreach my $sample (sort {sample_index($a) <=> sample_index($b)}
keys %feature_values_for_samples_hash) {
my @test_sample = @{$feature_values_for_samples_hash{$sample}};
my %classification = %{$dt->classify($root_node, \@test_sample)};
my @solution_path = @{$classification{'solution_path'}};
delete $classification{'solution_path'};
my @which_classes = keys %classification;
@which_classes = sort {$classification{$b} <=> $classification{$a}} @which_classes;
my $result_string = "$sample: ";
if ($show_hard_classifications) {
my $which_class = $which_classes[0];
$which_class =~ /=(.*)/;
my $class_name = $1;
my $valuestring = sprintf("%-20s", $classification{$which_class});
$result_string .= "$class_name => $valuestring ";
print OUTPUTHANDLE "$result_string\n";
} else {
foreach my $which_class (@which_classes) {
$which_class =~ /=(.*)/;
my $class_name = $1;
my $valuestring = sprintf("%-20s", $classification{$which_class});
$result_string .= "$class_name => $valuestring ";
}
print OUTPUTHANDLE "$result_string\n";
}
}
}
sub get_test_data_from_csv {
open FILEIN, $test_datafile or die "Unable to open $test_datafile: $!";
die("Aborted. get_test_data_csv() is only for CSV files")
unless $test_datafile =~ /\.csv$/;
my $class_name_in_column = $dt->{_csv_class_column_index} - 1;
my @all_data = <FILEIN>;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
fisher_yates_shuffle(\@training_data_records);
fisher_yates_shuffle(\@test_data_records);
if ($self->{_debug}) {
foreach my $record (@training_data_records) {
print "$record";
}
foreach my $record (@test_data_records) {
print "$record";
}
}
open OUTPUT, ">$self->{_output_training_csv_file}";
my @feature_names_training = @{$self->{_features_ordered}};
my @quoted_feature_names_training = map {"\"$_\""} @feature_names_training;
my $first_row_training = '"",' . "\"class_name\"," . join ",", @quoted_feature_names_training;
print OUTPUT "$first_row_training\n";
foreach my $i (0..@training_data_records-1) {
my $i1 = $i+1;
my $sample_record = "\"$i1\",$training_data_records[$i]";
print OUTPUT "$sample_record";
}
close OUTPUT;
open OUTPUT, ">$self->{_output_test_csv_file}";
my @feature_names_testing = keys %{$self->{_features_with_value_range}};
my @quoted_feature_names_testing = map {"\"$_\""} @feature_names_testing;
my $first_row_testing = '"",' . "\"class_name\"," . join ",", @quoted_feature_names_testing;
print OUTPUT "$first_row_testing\n";
foreach my $i (0..@test_data_records-1) {
my $i1 = $i+1;
my $sample_record = "\"$i1\",$test_data_records[$i]";
print OUTPUT "$sample_record";
}
close OUTPUT;
}
# from perl docs:
sub fisher_yates_shuffle {
my $arr = shift;
my $i = @$arr;
while (--$i) {
my $j = int rand( $i + 1 );
@$arr[$i, $j] = @$arr[$j, $i];
}
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
sub read_parameter_file_symbolic {
my $self = shift;
my $debug = $self->{_debug};
my $number_of_training_samples = $self->{_number_of_samples_for_training};
my $input_parameter_file = $self->{_parameter_file};
croak "Forgot to supply parameter file" if ! defined $input_parameter_file;
my $output_file_training = $self->{_output_training_datafile};
my $output_file_testing = $self->{_output_test_datafile};
my @all_params;
my $param_string;
open INPUT, $input_parameter_file || "unable to open parameter file: $!";
@all_params = <INPUT>;
@all_params = grep { $_ !~ /^[ ]*#/ } @all_params;
@all_params = grep { $_ =~ s/\r?\n?$//} @all_params;
$param_string = join ' ', @all_params;
my ($class_names, $class_priors, $rest_param) =
$param_string =~ /^\s*class names:(.*?)\s*class priors:(.*?)(feature: .*)/;
my @class_names = grep {defined($_) && length($_) > 0} split /\s+/, $1;
push @{$self->{_class_names}}, @class_names;
my @class_priors = grep {defined($_) && length($_) > 0} split /\s+/, $2;
push @{$self->{_class_priors}}, @class_priors;
my ($feature_string, $bias_string) = $rest_param =~ /(feature:.*?) (bias:.*)/;
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
}
sub display_all_plots {
my $self = shift;
my $ncols = $self->{_XMatrix}->cols;
unlink "regression_plots.png" if -e "regression_plots.png";
my $master_datafile = $self->{_training_datafile};
my $filename = basename($master_datafile);
my $temp_file = "__temp_" . $filename;
unlink $temp_file if -e $temp_file;
open OUTPUT, ">$temp_file"
or die "Unable to open a temp file in this directory: $!\n";
if ($ncols == 2) {
my @predictor_entries = $self->{_XMatrix}->col(0)->as_list;
my @dependent_val_vals = $self->{_YVector}->col(0)->as_list;
map {print OUTPUT "$predictor_entries[$_] $dependent_val_vals[$_]\n"} 0 .. $self->{_XMatrix}->rows - 1;
print OUTPUT "\n\n";
foreach my $plot (sort {$a <=> $b} keys %{$self->{_output_for_plots}}) {
map {print OUTPUT "$self->{_output_for_plots}->{$plot}->[0]->[$_] $self->{_output_for_plots}->{$plot}->[1]->[$_]\n"} 0 .. @{$self->{_output_for_plots}->{$plot}->[0]} - 1;
print OUTPUT "\n\n"
}
close OUTPUT;
my $gplot = Graphics::GnuplotIF->new( persist => 1 );
my $hardcopy_plot = Graphics::GnuplotIF->new();
$hardcopy_plot->gnuplot_cmd('set terminal png', "set output \"regression_plots.png\"");
$gplot->gnuplot_cmd( "set noclip" );
$gplot->gnuplot_cmd( "set pointsize 2" );
my $arg_string = "";
foreach my $i (0 .. scalar(keys %{$self->{_output_for_plots}})) {
if ($i == 0) {
$arg_string .= "\"$temp_file\" index $i using 1:2 notitle with points lt -1 pt 1, ";
} elsif ($i == 1) {
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
$arg_string = $arg_string =~ /^(.*),[ ]+$/;
$arg_string = $1;
$hardcopy_plot->gnuplot_cmd( "plot $arg_string" );
$gplot->gnuplot_cmd( "plot $arg_string" );
$gplot->gnuplot_pause(-1);
} elsif ($ncols == 3) {
my @dependent_val_vals = $self->{_YVector}->col(0)->as_list;
foreach my $i (0 .. $self->{_XMatrix}->rows - 1) {
my @onerow = $self->{_XMatrix}->row($i)->as_list;
pop @onerow;
print OUTPUT "@onerow $dependent_val_vals[$i]\n";
}
print OUTPUT "\n\n";
foreach my $plot (sort {$a <=> $b} keys %{$self->{_output_for_surface_plots}}) {
my @plot_data = @{$self->{_output_for_surface_plots}->{$plot}};
my @predictors = @{$plot_data[0]};
my @predictions = @{$plot_data[1]};
map {print OUTPUT "$predictors[$_] $predictions[$_]\n"} 0 .. @predictions - 1;
print OUTPUT "\n\n"
}
close OUTPUT;
my $gplot = Graphics::GnuplotIF->new( persist => 1 );
my $hardcopy_plot = Graphics::GnuplotIF->new();
$hardcopy_plot->gnuplot_cmd('set terminal png', "set output \"regression_plots.png\"");
$gplot->gnuplot_cmd( "set noclip" );
$gplot->gnuplot_cmd( "set pointsize 2" );
my $arg_string = "";
foreach my $i (0 .. scalar(keys %{$self->{_output_for_surface_plots}})) {
if ($i == 0) {
$arg_string .= "\"$temp_file\" index $i using 1:2:3 notitle with points lt -1 pt 1, ";
} elsif ($i == 1) {
( run in 0.287 second using v1.01-cache-2.11-cpan-4e96b696675 )