Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

lib/Algorithm/BoostedDecisionTree.pm  view on Meta::CPAN


############################################   Constructor  ##############################################
sub new { 
    my ($class, %args) = @_;
    my @params = keys %args;
    croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n" 
                           if check_for_illegal_params(@params) == 0;
    my %dtargs = %args;
    delete $dtargs{how_many_stages};
    my $instance = Algorithm::DecisionTree->new(%dtargs);
    bless $instance, $class;
    $instance->{_how_many_stages}              =  $args{how_many_stages} || undef;
    $instance->{_stagedebug}                   =  $args{stagedebug} || 0;
    $instance->{_training_samples}             =  {map {$_ => []} 0..$args{how_many_stages}};
    $instance->{_all_trees}                    =  {map {$_ => Algorithm::DecisionTree->new(%dtargs)} 0..$args{how_many_stages}};
    $instance->{_root_nodes}                   =  {map {$_ => undef} 0..$args{how_many_stages}};
    $instance->{_sample_selection_probs}       =  {map {$_ => {}} 0..$args{how_many_stages}};
    $instance->{_trust_factors}                =  {map {$_ => undef} 0..$args{how_many_stages}};
    $instance->{_misclassified_samples}        =  {map {$_ => []} 0..$args{how_many_stages}};
    $instance->{_classifications}              =  undef;
    $instance->{_trust_weighted_decision_classes}  =  undef;
    bless $instance, $class;
}

##############################################  Methods  #################################################
sub get_training_data_for_base_tree {
    my $self = shift;
    die("Aborted. get_training_data_csv() is only for CSV files") unless $self->{_training_datafile} =~ /\.csv$/;
    my %class_names = ();
    my %all_record_ids_with_class_labels;
    my $firstline;
    my %data_hash;

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

        $class = shift;
        $eval_or_boosting_mode = shift;
        die unless $eval_or_boosting_mode eq 'evalmode' || $eval_or_boosting_mode eq 'boostingmode';
        die "Only one string arg allowed in eval and boosting modes" if @_;
    }
    unless ($eval_or_boosting_mode) {
        my @params = keys %args;
        croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n" 
                           if check_for_illegal_params2(@params) == 0;
    }
    bless {
        _training_datafile                   =>    $args{training_datafile}, 
        _entropy_threshold                   =>    $args{entropy_threshold} || 0.01,
        _max_depth_desired                   =>    exists $args{max_depth_desired} ? 
                                                                       $args{max_depth_desired} : undef,
        _debug1                              =>    $args{debug1} || 0,
        _debug2                              =>    $args{debug2} || 0,
        _debug3                              =>    $args{debug3} || 0,
        _csv_class_column_index              =>    $args{csv_class_column_index} || undef,
        _csv_columns_for_features            =>    $args{csv_columns_for_features} || undef,
        _symbolic_to_numeric_cardinality_threshold

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

##  training are allowed to have numeric features --- which is the more interesting
##  case for evaluation analytics.

package EvalTrainingData;

@EvalTrainingData::ISA = ('Algorithm::DecisionTree');

sub new {
    my $class = shift;
    my $instance = Algorithm::DecisionTree->new(@_);
    bless $instance, $class;
}

sub evaluate_training_data {
    my $self = shift;
    my $evaldebug = 0;
    die "The data evaluation function in the module can only be used when your " .
        "training data is in a CSV file" unless $self->{_training_datafile} =~ /\.csv$/;
    print "\nWill run a 10-fold cross-validation test on your training data to test its " .
          "class-discriminatory power:\n";
    my %all_training_data = %{$self->{_training_data_hash}};

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

        $dt->{class_names} = undef;
    }
    my $self = {                                                         
            _dt                      => $dt,
            _feature                 => $feature,                                       
            _node_creation_entropy   => $entropy,
            _class_probabilities     => $class_probabilities,
            _branch_features_and_values_or_thresholds => $branch_features_and_values_or_thresholds,
            _linked_to => [],                                          
    };
    bless $self, $class;
    $self->{_serial_number} =  $self->get_next_serial_num();
    return $self;
}

sub how_many_nodes {
    my $self = shift;
    return $self->{_dt}->{nodes_created} + 1;
}

sub set_class_names {

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN


use strict;                                                         
use Carp;

sub new {                                                           
    my ($class, %args) = @_;
    my @params = keys %args;
    croak "\nYou have used a wrong name for a keyword argument " .
          "--- perhaps a misspelling\n" 
          if check_for_illegal_params3(@params) == 0;   
    bless {
        _output_training_csv_file          =>   $args{'output_training_csv_file'} 
                                                   || croak("name for output_training_csv_file required"),
        _output_test_csv_file              =>   $args{'output_test_csv_file'} 
                                                   || croak("name for output_test_csv_file required"),
        _parameter_file                    =>   $args{'parameter_file'}
                                                         || croak("parameter_file required"),
        _number_of_samples_for_training    =>   $args{'number_of_samples_for_training'} 
                                                         || croak("number_of_samples_for_training"),
        _number_of_samples_for_testing     =>   $args{'number_of_samples_for_testing'} 
                                                         || croak("number_of_samples_for_testing"),

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN


use strict;                                                         
use Carp;

sub new {                                                           
    my ($class, %args) = @_;
    my @params = keys %args;
    croak "\nYou have used a wrong name for a keyword argument " .
          "--- perhaps a misspelling\n" 
          if check_for_illegal_params4(@params) == 0;   
    bless {
        _output_training_datafile          =>   $args{'output_training_datafile'} 
                                                   || die("name for output_training_datafile required"),
        _parameter_file                    =>   $args{'parameter_file'}
                                                   || die("parameter_file required"),
        _number_of_samples_for_training    =>   $args{'number_of_samples_for_training'} 
                                                   || die("number_of_samples_for_training required"),
        _debug                             =>    $args{debug} || 0,
        _class_names                       =>    [],
        _class_priors                      =>    [],
        _features_and_values_hash          =>    {},

lib/Algorithm/DecisionTree.pm  view on Meta::CPAN

##  node indirectly if the node is a descendant of another node that is affected
##  directly by the training sample.

use strict; 
use Carp;

sub new {                                                           
    my ($class, $dt) = @_; 
    croak "The argument supplied to the DTIntrospection constructor must be of type DecisionTree"
        unless ref($dt) eq "Algorithm::DecisionTree";
    bless {                                                         
        _dt                                 => $dt,
        _root_dtnode                        => $dt->{_root_node},
        _samples_at_nodes_hash              => {},
        _branch_features_to_nodes_hash      => {},
        _sample_to_node_mapping_direct_hash => {},
        _node_serial_num_to_node_hash       => {}, 
        _awareness_raising_msg_shown        => 0,
        _debug                              => 0,
    }, $class;                                                     
}

lib/Algorithm/DecisionTreeWithBagging.pm  view on Meta::CPAN


############################################   Constructor  ##############################################
sub new { 
    my ($class, %args) = @_;
    my @params = keys %args;
    my %dtargs = %args;
    delete $dtargs{how_many_bags};
    delete $dtargs{bag_overlap_fraction};    
    croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n" 
                           if check_for_illegal_params(@params) == 0;
    bless {
        _training_datafile            =>  $args{training_datafile}, 
        _csv_class_column_index       =>  $args{csv_class_column_index} || undef,
        _csv_columns_for_features     =>  $args{csv_columns_for_features} || undef,
        _how_many_bags                =>  $args{how_many_bags} || croak("you must specify how_many_bags"),
        _bag_overlap_fraction         =>  $args{bag_overlap_fraction} || 0.20, 
        _csv_cleanup_needed           =>  $args{csv_cleanup_needed} || 0,
        _debug1                       =>  $args{debug1} || 0,
        _number_of_training_samples   =>  undef,
        _segmented_training_data      =>  {},
        _all_trees                    =>  {map {$_ => Algorithm::DecisionTree->new(%dtargs)} 0..$args{how_many_bags} - 1},

lib/Algorithm/RandomizedTreesForBigData.pm  view on Meta::CPAN

############################################   Constructor  ##############################################
sub new { 
    my ($class, %args) = @_;
    my @params = keys %args;
    my %dtargs = %args;
    delete $dtargs{how_many_trees};
    delete $dtargs{how_many_training_samples_per_tree} if exists $dtargs{how_many_training_samples_per_tree};
    delete $dtargs{looking_for_needles_in_haystack} if exists $dtargs{looking_for_needles_in_haystack};
    croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n" 
                           if check_for_illegal_params(@params) == 0;
    bless {
        _all_trees              =>  {map {$_ => Algorithm::DecisionTree->new(%dtargs)} 0..$args{how_many_trees}-1},
        _csv_cleanup_needed                    =>  $args{csv_cleanup_needed} || 0,
        _looking_for_needles_in_haystack       =>  $args{looking_for_needles_in_haystack}, 
        _how_many_training_samples_per_tree    =>  $args{how_many_training_samples_per_tree},
        _training_datafile                     =>  $args{training_datafile}, 
        _csv_class_column_index                =>  $args{csv_class_column_index} || undef,
        _csv_columns_for_features              =>  $args{csv_columns_for_features} || undef,
        _how_many_trees                        =>  $args{how_many_trees} || die "must specify number of trees",
        _root_nodes                            =>  [],
        _training_data_for_trees               =>  {map {$_ => []} 0..$args{how_many_trees} - 1},

lib/Algorithm/RegressionTree.pm  view on Meta::CPAN

    my %dtargs = %args;
    delete $dtargs{dependent_variable_column};
    delete $dtargs{predictor_columns};
    delete $dtargs{mse_threshold};
    delete $dtargs{need_data_normalization};
    delete $dtargs{jacobian_choice};
    delete $dtargs{debug1_r};
    delete $dtargs{debug2_r};
    delete $dtargs{debug3_r};
    my $instance = Algorithm::DecisionTree->new(%dtargs);
    bless $instance, $class;
    $instance->{_dependent_variable_column}       =  $args{dependent_variable_column} || undef;
    $instance->{_predictor_columns}               =  $args{predictor_columns} || 0;
    $instance->{_mse_threshold}                   =  $args{mse_threshold} || 0.01;
    $instance->{_jacobian_choice}                 =  $args{jacobian_choice} || 0;
    $instance->{_need_data_normalization}         =  $args{need_data_normalization} || 0;
    $instance->{_dependent_var}                   =  undef;
    $instance->{_dependent_var_values}            =  undef;
    $instance->{_samples_dependent_var_val_hash}  =  undef;
    $instance->{_root_node}                       =  undef;
    $instance->{_debug1_r}                        =  $args{debug1_r} || 0;
    $instance->{_debug2_r}                        =  $args{debug2_r} || 0;
    $instance->{_debug3_r}                        =  $args{debug3_r} || 0;
    $instance->{_sample_points_for_dependent_var} =  [];
    $instance->{_output_for_plots}                =  {};
    $instance->{_output_for_surface_plots}        =  {};
    bless $instance, $class;
}

##############################################  Methods  #################################################
sub get_training_data_for_regression {
    my $self = shift;
    die("Aborted. get_training_data_csv() is only for CSV files") unless $self->{_training_datafile} =~ /\.csv$/;
    my @dependent_var_values;
    my %all_record_ids_with_dependent_var_values;
    my $firstline;
    my %data_hash;

lib/Algorithm/RegressionTree.pm  view on Meta::CPAN

            _rt                      => $rt,
            _feature                 => $feature,                                       
            _error                   => $error,                                       
            _beta                    => $beta,                                       
            _branch_features_and_values_or_thresholds => $branch_features_and_values_or_thresholds,
            _num_data_points         => undef,                                       
            _XMatrix                 => undef,
            _YVector                 => undef,
            _linked_to               => [],                                          
    };
    bless $self, $class;
    $self->{_serial_number} =  $self->get_next_serial_num();
    return $self;
}

sub node_prediction_from_features_and_values {
    my $self = shift;
    my $feature_and_values = shift;
    my $ncols = $self->{_XMatrix}->cols;
    my $pattern = '(\S+)\s*=\s*(\S+)';
    my ($feature,$value);



( run in 1.057 second using v1.01-cache-2.11-cpan-de7293f3b23 )