Algorithm-TrunkClassifier

 view release on metacpan or  search on metacpan

lib/Algorithm/TrunkClassifier/Classification.pm  view on Meta::CPAN

package Algorithm::TrunkClassifier::Classification;

use warnings;
use strict;

use Algorithm::TrunkClassifier::DataWrapper;
use Algorithm::TrunkClassifier::FeatureSelection;
use Algorithm::TrunkClassifier::DecisionTrunk;
use Algorithm::TrunkClassifier::Util;
use POSIX;

our $VERSION = "v1.0.1";

#Description: Function responsible for building decision trunks and classifying test samples using LOOCV
#Parameters: (1) Package, (2) input dataset, (3) test dataset, (4) classification procedure, (5) split percent,
#            (6) testset data file name, (7) classification variable name, (8) output folder name,
#            (9) number of levels, (10) verbose flag, (11) input data file name (12) useall flag
#Return value: None
sub trainAndClassify($ $ $ $ $ $ $ $ $ $ $ $ $){
	shift(@_);
	my ($dataWrapper, $testset, $CLASSIFY, $SPLITPERCENT, $TESTFILE, $CLASSNAME, $OUTPUT, $LEVELS, $VERBOSE, $DATAFILE, $USEALL) = @_;
	
	#Create output files
	if(!-e $OUTPUT && $OUTPUT ne "."){
		system("mkdir $OUTPUT");
	}
	open(PERFORMANCE, ">$OUTPUT/performance.txt") or die "Error: Unable to create output file\n";
	open(LOO_TRUNKS, ">$OUTPUT/loo_trunks.txt") or die "Error: Unable to create output file\n";
	open(CTS_TRUNKS, ">$OUTPUT/cts_trunks.txt") or die "Error: Unable to create output file\n";
	open(REPORT, ">$OUTPUT/class_report.txt") or die "Error: Unable to create output file\n";
	open(LOG, ">$OUTPUT/log.txt") or die "Error: Unable to create output file\n";
	
	#Establish training and test set
	my $trainingSet;
	my $testSet;
	if($CLASSIFY eq "loocv"){
		$trainingSet = $dataWrapper->copy();
	}
	elsif($CLASSIFY eq "split"){
		my $containsBoth = 0;
		while(!$containsBoth){
			$trainingSet = $dataWrapper->copy();
			$testSet = $trainingSet->splitSamples($SPLITPERCENT);
			my $class1 = $trainingSet->getClassOneName();
			my $class2 = $trainingSet->getClassTwoName();
			if($trainingSet->getClassSize($class1) && $trainingSet->getClassSize($class2)){
				$containsBoth = 1;
			}
		}
	}
	elsif($CLASSIFY eq "dual"){
		$trainingSet = $dataWrapper->copy();
		$testSet = $testset->copy();
	}
	
	#Build trunks using leave-one-out
	my %featureOccurrence;
	my %selectedFeatures;
	my %looTrunks = ("1" => [], "2" => [], "3" => [], "4" => [], "5" => []);
	my $levelBreak = 0;
	for(my $levelLimit = 1; $levelLimit <= 5; $levelLimit++){
		if($VERBOSE){
			print("Trunk classifier: Building decision trunks with $levelLimit level(s) using leave-one-out\n");
		}
		
		#Build one trunk for each left out sample
		for(my $sampleIndex = 0; $sampleIndex < $trainingSet->getNumSamples(); $sampleIndex++){
			if($VERBOSE){
				print("Trunk classifier: Fold ", $sampleIndex + 1, " of ", $dataWrapper->getNumSamples(), "\n");
			}
			my $buildSet = $trainingSet->copy();
			$buildSet->leaveOneOut($sampleIndex);
			my $decisionTrunk = buildTrunk($buildSet, $levelLimit, $sampleIndex, \%featureOccurrence, \%selectedFeatures, \$levelBreak, $VERBOSE);
			
			#Add trunk to hash
			push(@{$looTrunks{$levelLimit}}, $decisionTrunk);
			
		}
		
		if($levelBreak){
			undef $featureOccurrence{$levelLimit};
			$looTrunks{$levelLimit} = [];
			last;
		}
	}
	
	#Build trunks using complete training set
	my %ctsTrunks = ("1" => 0, "2" => 0, "3" => 0, "4" => 0, "5" => 0);
	my %selFeats;
	my %dummyHash;
	$levelBreak = 0;



( run in 0.764 second using v1.01-cache-2.11-cpan-39bf76dae61 )