view release on metacpan or search on metacpan
ExamplesRandomizedTrees/classify_database_records.pl view on Meta::CPAN
$record_index++;
}
close FILEIN;
# Now classify all the records extracted from the database file:
my %original_classifications;
my %calculated_classifications;
foreach my $record_index (sort {$a <=> $b} keys %record_ids_with_features_and_vals) {
my @test_sample = @{$record_ids_with_features_and_vals{$record_index}};
# Let's now get rid of those feature=value combos when value is 'NA'
my $unknown_value_for_a_feature_flag;
map {$unknown_value_for_a_feature_flag = 1 if $_ =~ /=NA$/} @test_sample;
next if $unknown_value_for_a_feature_flag;
$rt->classify_with_all_trees( \@test_sample );
my $classification = $rt->get_majority_vote_classification();
printf("\nclassification for %5d: %10s original classification: %s", $record_index, $classification, $record_ids_with_class_labels{$record_index});
$original_classifications{$record_index} = $record_ids_with_class_labels{$record_index};
$classification =~ /=(.+)$/;
$calculated_classifications{$record_index} = $1;
}
my $total_errors = 0;
my @confusion_matrix_row1 = (0,0);
lib/Algorithm/BoostedDecisionTree.pm view on Meta::CPAN
max_depth_desired
csv_class_column_index
csv_columns_for_features
symbolic_to_numeric_cardinality_threshold
number_of_histogram_bins
csv_cleanup_needed
debug1
debug2
debug3
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
max_depth_desired
csv_class_column_index
csv_columns_for_features
symbolic_to_numeric_cardinality_threshold
number_of_histogram_bins
csv_cleanup_needed
debug1
debug2
debug3
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub print_array_with_msg {
my $message = shift;
my $arr = shift;
print "\n$message: ";
print_nested_array( $arr );
}
sub print_nested_array {
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
sub check_for_illegal_params3 {
my @params = @_;
my @legal_params = qw / output_training_csv_file
output_test_csv_file
parameter_file
number_of_samples_for_training
number_of_samples_for_testing
debug
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
## The training data generated by an instance of the class
## TrainingDataGeneratorNumeric is based on the specs you place in a parameter that
## you supply to the class constructor through a constructor variable called
## `parameter_file'. This method is for parsing the parameter file in order to
## order to determine the names to be used for the different data classes, their
## means, and their variances.
sub read_parameter_file_numeric {
my $self = shift;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
}, $class;
}
sub check_for_illegal_params4 {
my @params = @_;
my @legal_params = qw / output_training_datafile
parameter_file
number_of_samples_for_training
debug
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
## Read a parameter file for generating symbolic training data. See the script
## generate_symbolic_training_data_symbolic.pl in the Examples directory for how to
## pass the name of the parameter file to the constructor of the
## TrainingDataGeneratorSymbolic class.
sub read_parameter_file_symbolic {
my $self = shift;
my $debug = $self->{_debug};
my $number_of_training_samples = $self->{_number_of_samples_for_training};
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
max_depth_desired
csv_class_column_index
csv_columns_for_features
symbolic_to_numeric_cardinality_threshold
number_of_histogram_bins
csv_cleanup_needed
debug1
debug2
debug3
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
# from perl docs:
sub fisher_yates_shuffle {
my $arr = shift;
my $i = @$arr;
while (--$i) {
my $j = int rand( $i + 1 );
@$arr[$i, $j] = @$arr[$j, $i];
}
lib/Algorithm/RandomizedTreesForBigData.pm view on Meta::CPAN
csv_class_column_index
csv_columns_for_features
symbolic_to_numeric_cardinality_threshold
number_of_histogram_bins
how_many_trees
how_many_training_samples_per_tree
looking_for_needles_in_haystack
csv_cleanup_needed
debug1
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
}
}
return ($error, $beta0);
}
my $beta = $beta0;
if ($self->{_debug2_r}) {
print "\ndisplaying beta0 matrix\n";
display_matrix($beta);
}
my $gamma = 0.1;
my $iterate_again_flag = 1;
my $delta = 0.001;
my $master_interation_index = 0;
$|++;
while (1) {
print "*" unless $master_interation_index++ % 100;
last unless $iterate_again_flag;
$gamma *= 0.1;
$beta0 = 0.99 * $beta0;
print "\n\n======== starting iterations with gamma= $gamma ===========\n\n\n" if $self->{_debug2_r};
$beta = $beta0;
my $beta_old = Math::GSL::Matrix->new($betarows, 1)->zero;
my $error_old = sum( map abs, ($y - ($X * $beta_old) )->col(0)->as_list ) / $nrows;
my $error;
foreach my $iteration (0 .. 1499) {
print "." unless $iteration % 100;
$beta_old = $beta->copy;
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
}
$jacobian = (1.0/$delta) * $jacobian;
} else {
die "wrong choice for the jacobian_choice";
}
# $beta = $beta_old + 2 * $gamma * transpose($X) * ( $y - ($X * $beta) );
$beta = $beta_old + 2 * $gamma * transpose($jacobian) * ( $y - ($X * $beta) );
$error = sum( map abs, ($y - ($X * $beta) )->col(0)->as_list ) / $nrows;
if ($error > $error_old) {
if (vector_norm($beta - $beta_old) < (0.00001 * vector_norm($beta_old))) {
$iterate_again_flag = 0;
last;
} else {
last;
}
}
if ($self->{_debug2_r}) {
print "\n\niteration: $iteration gamma: $gamma current error: $error\n";
print "\nnew beta:\n";
display_matrix $beta;
}
if ( vector_norm($beta - $beta_old) < (0.00001 * vector_norm($beta_old)) ) {
print "iterations used: $iteration with gamma: $gamma\n" if $self->{_debug2_r};
$iterate_again_flag = 0;
last;
}
$error_old = $error;
}
}
display_matrix($beta) if $self->{_debug2_r};
my $predictions = $X * $beta;
my @error_distribution = ($y - ($X * $beta))->as_list;
my $squared_error = sum map abs, @error_distribution;
my $error = $squared_error / $nrows;
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
dependent_variable_column
predictor_columns
mse_threshold
need_data_normalization
jacobian_choice
csv_cleanup_needed
debug1_r
debug2_r
debug3_r
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);