view release on metacpan or search on metacpan
lib/Algorithm/BoostedDecisionTree.pm view on Meta::CPAN
############################################ Constructor ##############################################
sub new {
my ($class, %args) = @_;
my @params = keys %args;
croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n"
if check_for_illegal_params(@params) == 0;
my %dtargs = %args;
delete $dtargs{how_many_stages};
my $instance = Algorithm::DecisionTree->new(%dtargs);
bless $instance, $class;
$instance->{_how_many_stages} = $args{how_many_stages} || undef;
$instance->{_stagedebug} = $args{stagedebug} || 0;
$instance->{_training_samples} = {map {$_ => []} 0..$args{how_many_stages}};
$instance->{_all_trees} = {map {$_ => Algorithm::DecisionTree->new(%dtargs)} 0..$args{how_many_stages}};
$instance->{_root_nodes} = {map {$_ => undef} 0..$args{how_many_stages}};
$instance->{_sample_selection_probs} = {map {$_ => {}} 0..$args{how_many_stages}};
$instance->{_trust_factors} = {map {$_ => undef} 0..$args{how_many_stages}};
$instance->{_misclassified_samples} = {map {$_ => []} 0..$args{how_many_stages}};
$instance->{_classifications} = undef;
$instance->{_trust_weighted_decision_classes} = undef;
bless $instance, $class;
}
############################################## Methods #################################################
sub get_training_data_for_base_tree {
my $self = shift;
die("Aborted. get_training_data_csv() is only for CSV files") unless $self->{_training_datafile} =~ /\.csv$/;
my %class_names = ();
my %all_record_ids_with_class_labels;
my $firstline;
my %data_hash;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
$class = shift;
$eval_or_boosting_mode = shift;
die unless $eval_or_boosting_mode eq 'evalmode' || $eval_or_boosting_mode eq 'boostingmode';
die "Only one string arg allowed in eval and boosting modes" if @_;
}
unless ($eval_or_boosting_mode) {
my @params = keys %args;
croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n"
if check_for_illegal_params2(@params) == 0;
}
bless {
_training_datafile => $args{training_datafile},
_entropy_threshold => $args{entropy_threshold} || 0.01,
_max_depth_desired => exists $args{max_depth_desired} ?
$args{max_depth_desired} : undef,
_debug1 => $args{debug1} || 0,
_debug2 => $args{debug2} || 0,
_debug3 => $args{debug3} || 0,
_csv_class_column_index => $args{csv_class_column_index} || undef,
_csv_columns_for_features => $args{csv_columns_for_features} || undef,
_symbolic_to_numeric_cardinality_threshold
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
## training are allowed to have numeric features --- which is the more interesting
## case for evaluation analytics.
package EvalTrainingData;
@EvalTrainingData::ISA = ('Algorithm::DecisionTree');
sub new {
my $class = shift;
my $instance = Algorithm::DecisionTree->new(@_);
bless $instance, $class;
}
sub evaluate_training_data {
my $self = shift;
my $evaldebug = 0;
die "The data evaluation function in the module can only be used when your " .
"training data is in a CSV file" unless $self->{_training_datafile} =~ /\.csv$/;
print "\nWill run a 10-fold cross-validation test on your training data to test its " .
"class-discriminatory power:\n";
my %all_training_data = %{$self->{_training_data_hash}};
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
$dt->{class_names} = undef;
}
my $self = {
_dt => $dt,
_feature => $feature,
_node_creation_entropy => $entropy,
_class_probabilities => $class_probabilities,
_branch_features_and_values_or_thresholds => $branch_features_and_values_or_thresholds,
_linked_to => [],
};
bless $self, $class;
$self->{_serial_number} = $self->get_next_serial_num();
return $self;
}
sub how_many_nodes {
my $self = shift;
return $self->{_dt}->{nodes_created} + 1;
}
sub set_class_names {
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
use strict;
use Carp;
sub new {
my ($class, %args) = @_;
my @params = keys %args;
croak "\nYou have used a wrong name for a keyword argument " .
"--- perhaps a misspelling\n"
if check_for_illegal_params3(@params) == 0;
bless {
_output_training_csv_file => $args{'output_training_csv_file'}
|| croak("name for output_training_csv_file required"),
_output_test_csv_file => $args{'output_test_csv_file'}
|| croak("name for output_test_csv_file required"),
_parameter_file => $args{'parameter_file'}
|| croak("parameter_file required"),
_number_of_samples_for_training => $args{'number_of_samples_for_training'}
|| croak("number_of_samples_for_training"),
_number_of_samples_for_testing => $args{'number_of_samples_for_testing'}
|| croak("number_of_samples_for_testing"),
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
use strict;
use Carp;
sub new {
my ($class, %args) = @_;
my @params = keys %args;
croak "\nYou have used a wrong name for a keyword argument " .
"--- perhaps a misspelling\n"
if check_for_illegal_params4(@params) == 0;
bless {
_output_training_datafile => $args{'output_training_datafile'}
|| die("name for output_training_datafile required"),
_parameter_file => $args{'parameter_file'}
|| die("parameter_file required"),
_number_of_samples_for_training => $args{'number_of_samples_for_training'}
|| die("number_of_samples_for_training required"),
_debug => $args{debug} || 0,
_class_names => [],
_class_priors => [],
_features_and_values_hash => {},
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
## node indirectly if the node is a descendant of another node that is affected
## directly by the training sample.
use strict;
use Carp;
sub new {
my ($class, $dt) = @_;
croak "The argument supplied to the DTIntrospection constructor must be of type DecisionTree"
unless ref($dt) eq "Algorithm::DecisionTree";
bless {
_dt => $dt,
_root_dtnode => $dt->{_root_node},
_samples_at_nodes_hash => {},
_branch_features_to_nodes_hash => {},
_sample_to_node_mapping_direct_hash => {},
_node_serial_num_to_node_hash => {},
_awareness_raising_msg_shown => 0,
_debug => 0,
}, $class;
}
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
############################################ Constructor ##############################################
sub new {
my ($class, %args) = @_;
my @params = keys %args;
my %dtargs = %args;
delete $dtargs{how_many_bags};
delete $dtargs{bag_overlap_fraction};
croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n"
if check_for_illegal_params(@params) == 0;
bless {
_training_datafile => $args{training_datafile},
_csv_class_column_index => $args{csv_class_column_index} || undef,
_csv_columns_for_features => $args{csv_columns_for_features} || undef,
_how_many_bags => $args{how_many_bags} || croak("you must specify how_many_bags"),
_bag_overlap_fraction => $args{bag_overlap_fraction} || 0.20,
_csv_cleanup_needed => $args{csv_cleanup_needed} || 0,
_debug1 => $args{debug1} || 0,
_number_of_training_samples => undef,
_segmented_training_data => {},
_all_trees => {map {$_ => Algorithm::DecisionTree->new(%dtargs)} 0..$args{how_many_bags} - 1},
lib/Algorithm/RandomizedTreesForBigData.pm view on Meta::CPAN
############################################ Constructor ##############################################
sub new {
my ($class, %args) = @_;
my @params = keys %args;
my %dtargs = %args;
delete $dtargs{how_many_trees};
delete $dtargs{how_many_training_samples_per_tree} if exists $dtargs{how_many_training_samples_per_tree};
delete $dtargs{looking_for_needles_in_haystack} if exists $dtargs{looking_for_needles_in_haystack};
croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n"
if check_for_illegal_params(@params) == 0;
bless {
_all_trees => {map {$_ => Algorithm::DecisionTree->new(%dtargs)} 0..$args{how_many_trees}-1},
_csv_cleanup_needed => $args{csv_cleanup_needed} || 0,
_looking_for_needles_in_haystack => $args{looking_for_needles_in_haystack},
_how_many_training_samples_per_tree => $args{how_many_training_samples_per_tree},
_training_datafile => $args{training_datafile},
_csv_class_column_index => $args{csv_class_column_index} || undef,
_csv_columns_for_features => $args{csv_columns_for_features} || undef,
_how_many_trees => $args{how_many_trees} || die "must specify number of trees",
_root_nodes => [],
_training_data_for_trees => {map {$_ => []} 0..$args{how_many_trees} - 1},
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
my %dtargs = %args;
delete $dtargs{dependent_variable_column};
delete $dtargs{predictor_columns};
delete $dtargs{mse_threshold};
delete $dtargs{need_data_normalization};
delete $dtargs{jacobian_choice};
delete $dtargs{debug1_r};
delete $dtargs{debug2_r};
delete $dtargs{debug3_r};
my $instance = Algorithm::DecisionTree->new(%dtargs);
bless $instance, $class;
$instance->{_dependent_variable_column} = $args{dependent_variable_column} || undef;
$instance->{_predictor_columns} = $args{predictor_columns} || 0;
$instance->{_mse_threshold} = $args{mse_threshold} || 0.01;
$instance->{_jacobian_choice} = $args{jacobian_choice} || 0;
$instance->{_need_data_normalization} = $args{need_data_normalization} || 0;
$instance->{_dependent_var} = undef;
$instance->{_dependent_var_values} = undef;
$instance->{_samples_dependent_var_val_hash} = undef;
$instance->{_root_node} = undef;
$instance->{_debug1_r} = $args{debug1_r} || 0;
$instance->{_debug2_r} = $args{debug2_r} || 0;
$instance->{_debug3_r} = $args{debug3_r} || 0;
$instance->{_sample_points_for_dependent_var} = [];
$instance->{_output_for_plots} = {};
$instance->{_output_for_surface_plots} = {};
bless $instance, $class;
}
############################################## Methods #################################################
sub get_training_data_for_regression {
my $self = shift;
die("Aborted. get_training_data_csv() is only for CSV files") unless $self->{_training_datafile} =~ /\.csv$/;
my @dependent_var_values;
my %all_record_ids_with_dependent_var_values;
my $firstline;
my %data_hash;
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
_rt => $rt,
_feature => $feature,
_error => $error,
_beta => $beta,
_branch_features_and_values_or_thresholds => $branch_features_and_values_or_thresholds,
_num_data_points => undef,
_XMatrix => undef,
_YVector => undef,
_linked_to => [],
};
bless $self, $class;
$self->{_serial_number} = $self->get_next_serial_num();
return $self;
}
sub node_prediction_from_features_and_values {
my $self = shift;
my $feature_and_values = shift;
my $ncols = $self->{_XMatrix}->cols;
my $pattern = '(\S+)\s*=\s*(\S+)';
my ($feature,$value);