Algorithm-DecisionTree
view release on metacpan or search on metacpan
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
}
sub minmax {
my $arr = shift;
my ($min, $max);
foreach my $i (0..@{$arr}-1) {
if ( (!defined $min) || ($arr->[$i] < $min) ) {
$min = $arr->[$i];
}
if ( (!defined $max) || ($arr->[$i] > $max) ) {
$max = $arr->[$i];
}
}
return ($min, $max);
}
sub sample_index {
my $arg = shift;
$arg =~ /_(.+)$/;
return $1;
}
sub check_for_illegal_params {
my @params = @_;
my @legal_params = qw / training_datafile
max_depth_desired
dependent_variable_column
predictor_columns
mse_threshold
need_data_normalization
jacobian_choice
csv_cleanup_needed
debug1_r
debug2_r
debug3_r
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub cleanup_csv {
my $line = shift;
$line =~ tr/\/:?()[]{}'/ /;
# my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
for (@double_quoted) {
my $item = $_;
$item = substr($item, 1, -1);
$item =~ s/^\s+|,|\s+$//g;
$item = join '_', split /\s+/, $item;
substr($line, index($line, $_), length($_)) = $item;
}
my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
for (@white_spaced) {
my $item = $_;
$item =~ s/\s+/_/g;
$item =~ s/^\s*_|_\s*$//g;
substr($line, index($line, $_), length($_)) = $item;
}
$line =~ s/,\s*(?=,|$)/,NA/g;
return $line;
}
sub transpose {
my $matrix = shift;
my $num_rows = $matrix->rows();
my $num_cols = $matrix->cols();
my $transpose = Math::GSL::Matrix->new($num_cols, $num_rows);
foreach my $i (0..$num_rows-1) {
my @row = $matrix->row($i)->as_list;
$transpose->set_col($i, \@row );
}
return $transpose;
}
sub vector_subtract {
my $vec1 = shift;
my $vec2 = shift;
die "wrong data types for vector subtract calculation\n" if @$vec1 != @$vec2;
my @result;
foreach my $i (0..@$vec1-1){
push @result, $vec1->[$i] - $vec2->[$i];
}
return @result;
}
sub vector_norm {
my $vec = shift; # assume it to be a column vector
my ($rows, $cols) = $vec->dim;
die "vector_norm() can only be called for a single column matrix" if $cols > 1;
my @norm = (transpose($vec) * $vec)->as_list;
return sqrt($norm[0]);
}
sub display_matrix {
my $matrix = shift;
my $nrows = $matrix->rows();
my $ncols = $matrix->cols();
print "\nDisplaying a matrix of size $nrows rows and $ncols columns:\n";
foreach my $i (0..$nrows-1) {
my $row = $matrix->row($i);
my @row_as_list = $row->as_list;
map { printf("%.4f ", $_) } @row_as_list;
print "\n";
}
print "\n\n";
}
# Meant only for an array of strings (no nesting):
sub deep_copy_array {
my $ref_in = shift;
my $ref_out;
return [] if scalar @$ref_in == 0;
foreach my $i (0..@{$ref_in}-1) {
$ref_out->[$i] = $ref_in->[$i];
}
return $ref_out;
}
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
$self->{_rt}->{nodes_created} += 1;
return $self->{_rt}->{nodes_created};
}
sub get_serial_num {
my $self = shift;
$self->{_serial_number};
}
# this returns the feature test at the current node
sub get_feature {
my $self = shift;
return $self->{ _feature };
}
sub set_feature {
my $self = shift;
my $feature = shift;
$self->{_feature} = $feature;
}
sub get_branch_features_and_values_or_thresholds {
my $self = shift;
return $self->{_branch_features_and_values_or_thresholds};
}
sub get_children {
my $self = shift;
return $self->{_linked_to};
}
sub add_child_link {
my ($self, $new_node, ) = @_;
push @{$self->{_linked_to}}, $new_node;
}
sub delete_all_links {
my $self = shift;
$self->{_linked_to} = undef;
}
sub display_node {
my $self = shift;
my $feature_at_node = $self->get_feature() || " ";
my $serial_num = $self->get_serial_num();
my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
print "\n\nNODE $serial_num" .
":\n Branch features and values to this node: @branch_features_and_values_or_thresholds" .
"\n Best feature test at current node: $feature_at_node\n\n";
$self->{_rt}->estimate_regression_coefficients($self->get_node_XMatrix(), $self->get_node_YVector(), 1);
}
sub display_regression_tree {
my $self = shift;
my $offset = shift;
my $serial_num = $self->get_serial_num();
if (@{$self->get_children()} > 0) {
my $feature_at_node = $self->get_feature() || " ";
my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
print "NODE $serial_num: $offset BRANCH TESTS TO NODE: @branch_features_and_values_or_thresholds\n";
my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
print "$second_line_offset" . "Decision Feature: $feature_at_node\n\n";
$offset .= " ";
foreach my $child (@{$self->get_children()}) {
$child->display_regression_tree($offset);
}
} else {
my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
print "NODE $serial_num: $offset BRANCH TESTS TO LEAF NODE: @branch_features_and_values_or_thresholds\n";
my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
}
}
1;
( run in 0.603 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )