Algorithm-DecisionTree
view release on metacpan or search on metacpan
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
package Algorithm::DecisionTreeWithBagging;
#--------------------------------------------------------------------------------------
# Copyright (c) 2017 Avinash Kak. All rights reserved. This program is free
# software. You may modify and/or distribute it under the same terms as Perl itself.
# This copyright notice must remain attached to the file.
#
# Algorithm::DecisionTreeWithBagging is a Perl module for incorporating bagging in
# decision tree construction and in classification using decision trees.
# -------------------------------------------------------------------------------------
#use lib 'blib/lib', 'blib/arch';
#use 5.10.0;
use strict;
use warnings;
use Carp;
use Algorithm::DecisionTree 3.43;
our $VERSION = '3.43';
############################################ Constructor ##############################################
sub new {
my ($class, %args) = @_;
my @params = keys %args;
my %dtargs = %args;
delete $dtargs{how_many_bags};
delete $dtargs{bag_overlap_fraction};
croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n"
if check_for_illegal_params(@params) == 0;
bless {
_training_datafile => $args{training_datafile},
_csv_class_column_index => $args{csv_class_column_index} || undef,
_csv_columns_for_features => $args{csv_columns_for_features} || undef,
_how_many_bags => $args{how_many_bags} || croak("you must specify how_many_bags"),
_bag_overlap_fraction => $args{bag_overlap_fraction} || 0.20,
_csv_cleanup_needed => $args{csv_cleanup_needed} || 0,
_debug1 => $args{debug1} || 0,
_number_of_training_samples => undef,
_segmented_training_data => {},
_all_trees => {map {$_ => Algorithm::DecisionTree->new(%dtargs)} 0..$args{how_many_bags} - 1},
_root_nodes => [],
_bag_sizes => [],
_classifications => undef,
}, $class;
}
############################################## Methods #################################################
sub get_training_data_for_bagging {
my $self = shift;
die("Aborted. get_training_data_csv() is only for CSV files") unless $self->{_training_datafile} =~ /\.csv$/;
my %class_names = ();
my %all_record_ids_with_class_labels;
my $firstline;
my %data_hash;
$|++;
open FILEIN, $self->{_training_datafile} or die "Unable to open $self->{_training_datafile} $!";
my $record_index = 0;
my $firsetline;
while (<FILEIN>) {
next if /^[ ]*\r?\n?$/;
$_ =~ s/\r?\n?$//;
my $record = $self->{_csv_cleanup_needed} ? cleanup_csv($_) : $_;
if ($record_index == 0) {
$firstline = $record;
$record_index++;
next;
}
my @parts = split /,/, $record;
my $classname = $parts[$self->{_csv_class_column_index}];
$class_names{$classname} = 1;
my $record_label = shift @parts;
( run in 1.049 second using v1.01-cache-2.11-cpan-39bf76dae61 )