Algorithm-TrunkClassifier
view release on metacpan or search on metacpan
lib/Algorithm/TrunkClassifier/Classification.pm view on Meta::CPAN
package Algorithm::TrunkClassifier::Classification;
use warnings;
use strict;
use Algorithm::TrunkClassifier::DataWrapper;
use Algorithm::TrunkClassifier::FeatureSelection;
use Algorithm::TrunkClassifier::DecisionTrunk;
use Algorithm::TrunkClassifier::Util;
use POSIX;
our $VERSION = "v1.0.1";
#Description: Function responsible for building decision trunks and classifying test samples using LOOCV
#Parameters: (1) Package, (2) input dataset, (3) test dataset, (4) classification procedure, (5) split percent,
# (6) testset data file name, (7) classification variable name, (8) output folder name,
# (9) number of levels, (10) verbose flag, (11) input data file name (12) useall flag
#Return value: None
sub trainAndClassify($ $ $ $ $ $ $ $ $ $ $ $ $){
shift(@_);
my ($dataWrapper, $testset, $CLASSIFY, $SPLITPERCENT, $TESTFILE, $CLASSNAME, $OUTPUT, $LEVELS, $VERBOSE, $DATAFILE, $USEALL) = @_;
#Create output files
if(!-e $OUTPUT && $OUTPUT ne "."){
system("mkdir $OUTPUT");
}
open(PERFORMANCE, ">$OUTPUT/performance.txt") or die "Error: Unable to create output file\n";
open(LOO_TRUNKS, ">$OUTPUT/loo_trunks.txt") or die "Error: Unable to create output file\n";
open(CTS_TRUNKS, ">$OUTPUT/cts_trunks.txt") or die "Error: Unable to create output file\n";
open(REPORT, ">$OUTPUT/class_report.txt") or die "Error: Unable to create output file\n";
open(LOG, ">$OUTPUT/log.txt") or die "Error: Unable to create output file\n";
#Establish training and test set
my $trainingSet;
my $testSet;
if($CLASSIFY eq "loocv"){
$trainingSet = $dataWrapper->copy();
}
elsif($CLASSIFY eq "split"){
my $containsBoth = 0;
while(!$containsBoth){
$trainingSet = $dataWrapper->copy();
$testSet = $trainingSet->splitSamples($SPLITPERCENT);
my $class1 = $trainingSet->getClassOneName();
my $class2 = $trainingSet->getClassTwoName();
if($trainingSet->getClassSize($class1) && $trainingSet->getClassSize($class2)){
$containsBoth = 1;
}
}
}
elsif($CLASSIFY eq "dual"){
$trainingSet = $dataWrapper->copy();
$testSet = $testset->copy();
}
#Build trunks using leave-one-out
my %featureOccurrence;
my %selectedFeatures;
my %looTrunks = ("1" => [], "2" => [], "3" => [], "4" => [], "5" => []);
my $levelBreak = 0;
for(my $levelLimit = 1; $levelLimit <= 5; $levelLimit++){
if($VERBOSE){
print("Trunk classifier: Building decision trunks with $levelLimit level(s) using leave-one-out\n");
}
#Build one trunk for each left out sample
for(my $sampleIndex = 0; $sampleIndex < $trainingSet->getNumSamples(); $sampleIndex++){
if($VERBOSE){
print("Trunk classifier: Fold ", $sampleIndex + 1, " of ", $dataWrapper->getNumSamples(), "\n");
}
my $buildSet = $trainingSet->copy();
$buildSet->leaveOneOut($sampleIndex);
my $decisionTrunk = buildTrunk($buildSet, $levelLimit, $sampleIndex, \%featureOccurrence, \%selectedFeatures, \$levelBreak, $VERBOSE);
#Add trunk to hash
push(@{$looTrunks{$levelLimit}}, $decisionTrunk);
}
if($levelBreak){
undef $featureOccurrence{$levelLimit};
$looTrunks{$levelLimit} = [];
last;
}
}
#Build trunks using complete training set
my %ctsTrunks = ("1" => 0, "2" => 0, "3" => 0, "4" => 0, "5" => 0);
my %selFeats;
my %dummyHash;
$levelBreak = 0;
( run in 0.764 second using v1.01-cache-2.11-cpan-39bf76dae61 )