Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

lib/Algorithm/RegressionTree.pm  view on Meta::CPAN

package Algorithm::RegressionTree;

#--------------------------------------------------------------------------------------
# Copyright (c) 2017 Avinash Kak. All rights reserved.  This program is free
# software.  You may modify and/or distribute it under the same terms as Perl itself.
# This copyright notice must remain attached to the file.
#
# Algorithm::RegressionTree is a Perl module for constructing regression trees.  It calls
# on the main Algorithm::DecisionTree module for some of its functionality.
# -------------------------------------------------------------------------------------

#use lib 'blib/lib', 'blib/arch';

#use 5.10.0;
use strict;
use warnings;
use Carp;
use File::Basename;
use Algorithm::DecisionTree 3.43;
use List::Util qw(reduce min max pairmap sum);
use Math::GSL::Matrix;
use Graphics::GnuplotIF;

our $VERSION = '3.43';

@Algorithm::RegressionTree::ISA = ('Algorithm::DecisionTree');

############################################   Constructor  ##############################################
sub new { 
    my ($class, %args) = @_;
    my @params = keys %args;
    croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n" 
                           if check_for_illegal_params(@params) == 0;
    my %dtargs = %args;
    delete $dtargs{dependent_variable_column};
    delete $dtargs{predictor_columns};
    delete $dtargs{mse_threshold};
    delete $dtargs{need_data_normalization};
    delete $dtargs{jacobian_choice};
    delete $dtargs{debug1_r};
    delete $dtargs{debug2_r};
    delete $dtargs{debug3_r};
    my $instance = Algorithm::DecisionTree->new(%dtargs);
    bless $instance, $class;
    $instance->{_dependent_variable_column}       =  $args{dependent_variable_column} || undef;
    $instance->{_predictor_columns}               =  $args{predictor_columns} || 0;
    $instance->{_mse_threshold}                   =  $args{mse_threshold} || 0.01;
    $instance->{_jacobian_choice}                 =  $args{jacobian_choice} || 0;
    $instance->{_need_data_normalization}         =  $args{need_data_normalization} || 0;
    $instance->{_dependent_var}                   =  undef;
    $instance->{_dependent_var_values}            =  undef;
    $instance->{_samples_dependent_var_val_hash}  =  undef;
    $instance->{_root_node}                       =  undef;
    $instance->{_debug1_r}                        =  $args{debug1_r} || 0;
    $instance->{_debug2_r}                        =  $args{debug2_r} || 0;
    $instance->{_debug3_r}                        =  $args{debug3_r} || 0;
    $instance->{_sample_points_for_dependent_var} =  [];
    $instance->{_output_for_plots}                =  {};
    $instance->{_output_for_surface_plots}        =  {};
    bless $instance, $class;
}

##############################################  Methods  #################################################
sub get_training_data_for_regression {
    my $self = shift;
    die("Aborted. get_training_data_csv() is only for CSV files") unless $self->{_training_datafile} =~ /\.csv$/;
    my @dependent_var_values;
    my %all_record_ids_with_dependent_var_values;
    my $firstline;
    my %data_hash;
    $|++;
    open FILEIN, $self->{_training_datafile};



( run in 1.718 second using v1.01-cache-2.11-cpan-99c4e6809bf )