Algorithm-TrunkClassifier

 view release on metacpan or  search on metacpan

lib/Algorithm/TrunkClassifier/Classification.pm  view on Meta::CPAN

use Algorithm::TrunkClassifier::Util;
use POSIX;

our $VERSION = "v1.0.1";

#Description: Function responsible for building decision trunks and classifying test samples using LOOCV
#Parameters: (1) Package, (2) input dataset, (3) test dataset, (4) classification procedure, (5) split percent,
#            (6) testset data file name, (7) classification variable name, (8) output folder name,
#            (9) number of levels, (10) verbose flag, (11) input data file name (12) useall flag
#Return value: None
sub trainAndClassify($ $ $ $ $ $ $ $ $ $ $ $ $){
	shift(@_);
	my ($dataWrapper, $testset, $CLASSIFY, $SPLITPERCENT, $TESTFILE, $CLASSNAME, $OUTPUT, $LEVELS, $VERBOSE, $DATAFILE, $USEALL) = @_;
	
	#Create output files
	if(!-e $OUTPUT && $OUTPUT ne "."){
		system("mkdir $OUTPUT");
	}
	open(PERFORMANCE, ">$OUTPUT/performance.txt") or die "Error: Unable to create output file\n";
	open(LOO_TRUNKS, ">$OUTPUT/loo_trunks.txt") or die "Error: Unable to create output file\n";
	open(CTS_TRUNKS, ">$OUTPUT/cts_trunks.txt") or die "Error: Unable to create output file\n";

lib/Algorithm/TrunkClassifier/Classification.pm  view on Meta::CPAN

	close(LOG);
	if($VERBOSE){
		print("Trunk classifier: Job finished\n");
	}
}

#Description: Wrapper for the trunk build loop
#Parameters: (1) Training dataset, (2) level limit, (3) sample index, (4) feature occurrence hash ref,
#            (5) selected features hash ref, (6) level break flag ref, (7) verbose flag
#Return value: Decision trunk object
sub buildTrunk($ $ $ $ $ $ $){
	my ($buildSet, $levelLimit, $sampleIndex, $featOccurRef, $selFeatRef, $levelBreakRef, $VERBOSE) = @_;
	
	#Trunk build loop
	my $decisionTrunk = Algorithm::TrunkClassifier::DecisionTrunk->new();
	my $noSampleBreak = 0;
	for(my $levelIndex = 1; $levelIndex <= $levelLimit; $levelIndex++){
	
		#Perform feature selection
		my $featureName;
		my $featureIndex;

lib/Algorithm/TrunkClassifier/Classification.pm  view on Meta::CPAN

		if($noSampleBreak){
			last;
		}
	}
	return $decisionTrunk;
}

#Description: Determine the decision trunk level with highest feature selection stability
#Parameters: (1) Hash reference containing selected features, (2) number of samples in the dataset
#Return value: Number of decision trunk levels to use for classification
sub stabilityCheck($ $){
	my ($hashRef, $numSamples) = @_;
	my %featOccurrence = %{$hashRef};
	my $numThresh = 6;
	my $chosenLevel = 0;
	foreach my $levelIndex (1 .. 5){
		if(!$featOccurrence{$levelIndex}){
			next;
		}
		my %features = %{$featOccurrence{$levelIndex}};
		my $numFeats = scalar(keys(%features));

lib/Algorithm/TrunkClassifier/CommandProcessor.pm  view on Meta::CPAN


our $VERSION = 'v1.0.1';

my %commands;

#Description: Command processor constructor
#Parameters: (1) TrunkClassifier::CommandProcessor, (2) classification procedure ref, (3), split ref, (4) testset ref,
#            (5) class name variable ref, (6) output folder variable ref, (7) level variable ref, (8) prospect variable ref,
#            (9) supplementary file variable ref, (10) verbose variable ref, (11) useall variable ref, (12) input data file variable ref
#Return value: TrunkClassifier::CommandProcessor object
sub new($ $ $ $ $ $ $ $ $ $ $ $ $){
	my ($class, $classifyRef, $splitPercentRef, $testsetRef, $classnameRef, $outputRef, $levelRef, $prospectRef, $suppfileRef, $verboseRef, $useallRef, $datafileRef) = @_;
	%commands = (
		"-p"			=> {"numArgs" => 1, "validArgs" => 'loocv|split|dual', "var" => $classifyRef, "sub" => \&checkTestsetArg},
		"--procedure"	=> {"numArgs" => 1, "validArgs" => 'loocv|split|dual', "var" => $classifyRef},
		"-e"            => {"numArgs" => 1, "validArgs" => '^[1-9][0-9]?$', "var" => $splitPercentRef},
		"--split"       => {"numArgs" => 1, "validArgs" => '^[1-9][0-9]?$', "var" => $splitPercentRef},
		"-t"			=> {"numArgs" => 1, "validArgs" => '.+', "var" => $testsetRef},
		"--testset"		=> {"numArgs" => 1, "validArgs" => '.+', "var" => $testsetRef},
		"-c"			=> {"numArgs" => 1, "validArgs" => '.+', "var" => $classnameRef},
		"--classvar"	=> {"numArgs" => 1, "validArgs" => '.+', "var" => $classnameRef},

lib/Algorithm/TrunkClassifier/CommandProcessor.pm  view on Meta::CPAN

		}
	}
	if(!${$self->{"input"}}){
		die "Error: Input data file not supplied\n";
	}
}

#Description: Checks that the -t option is supplied if -c dual is used
#Parameters: (1) The -c argument, (2) command line arguments
#Return value: None
sub checkTestsetArg($ $){
	my ($argument, $comLineRef) = @_;
	if($argument eq "dual"){
		my $foundT = 0;
		foreach my $arg (@{$comLineRef}){
			if($arg eq "-t"){
				$foundT = 1;
				last;
			}
		}
		if(!$foundT){
			die "Error: Command line option -t must be given when -c dual is used\n";
		}
	}
}

#Description: Command line help
#Parameters: None
#Return value: None
sub commandHelp(){
	my $doc = <<END;
Usage
    perl trunk_classifier.pl [Options] [File]

Options
	-p, --procedure     Classification procedure to use [loocv|split|dual]
	-e, --split         Percentage of samples to use as test set when using -p split
	-t, --testset       Dataset to classify when using -c dual
    -c, --classvar      Name of the classification variable to use
    -o, --output        Name of the output folder

lib/Algorithm/TrunkClassifier/DataWrapper.pm  view on Meta::CPAN

	}
	
	#Read input data file
	readExpData($self, $className, $prospect, $dataFileName, $datasetType);
	return $self;
}

#Description: Reads the supplementary file and writes new input data file with meta data
#Parameters: (1) supplementary file name, (2) input data file name, (3) dataset type
#Return value: New input data file name
sub readSuppFile($ $ $ $){
	my ($suppFileName, $dataFileName, $VERBOSE, $datasetType) = @_;

	#Read supplementary file
	open(SUPP_FILE, $suppFileName) or die "Error: Unable to open supplementary file '$suppFileName'\n";
	my @suppFile = <SUPP_FILE>;
	my $content = join("", @suppFile);
	$content =~ s/\r|\n\r|\r\n/\n/g;
	@suppFile = split(/\n+/, $content);
	close(SUPP_FILE);
	

lib/Algorithm/TrunkClassifier/DataWrapper.pm  view on Meta::CPAN

	}
	print(DATA_FILE $meta . join("", @dataFile));
	close(DATA_FILE);
	return $dataFileName;
}

#Description: Reads input data file with expression values and meta data
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) classification variable name
#            (3) prospect flag, (4) input data file name, (5) dataset type
#Return value: None
sub readExpData($ $ $ $ $){
	my ($self, $className, $prospect, $dataFileName, $datasetType) = @_;
	$className = uc($className);
	
	#Read input data file
	if(!open(DATA_FILE, $dataFileName)){
		die "Error: Unable to open $datasetType '$dataFileName'\n";
	}
	my @dataFile = <DATA_FILE>;
	close(DATA_FILE);
	my $content = join("", @dataFile);

lib/Algorithm/TrunkClassifier/DataWrapper.pm  view on Meta::CPAN

	$self->{"rownames"} = \@probeNames;
	$self->{"data_matrix"} = \@dataMatrix;
	$self->{"class_vector"} = \@incClassVector;
	$self->{"class_one"} = $classOne;
	$self->{"class_two"} = $classTwo;
}

#Description: Returns the number of samples in the dataset
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Number of elements in "colnames" attribute
sub getNumSamples($){
	my $self = shift(@_);
	return scalar(@{$self->{"colnames"}});
}

#Description: Returns the number of probes in the dataset
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Number of rows in "rownames" array
sub getNumProbes($){
	my $self = shift(@_);
	return scalar(@{$self->{"rownames"}});
}

#Description: Returns the row names of the DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Array of row names
sub getProbeList($){
	my $self = shift(@_);
	return @{$self->{"rownames"}};
}

#Description: Returns a reference to the data matrix
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Array reference
sub getDataMatrix($){
	my $self = shift(@_);
	return $self->{"data_matrix"};
}

#Description: Returns a reference to the class vector
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Array reference
sub getClassVector($){
	my $self = shift(@_);
	return $self->{"class_vector"};
}

#Description: Returns the name of class one
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Class name
sub getClassOneName($){
	my $self = shift(@_);
	return $self->{"class_one"};
}

#Description: Returns the name of class two
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Class name
sub getClassTwoName($){
	my $self = shift(@_);
	return $self->{"class_two"};
}

#Description: Returns a copy of a TrunkClassifier::DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: New TrunkClassifier::DataWrapper object
sub copy($){
	my $self = shift(@_);
	my $newWrapper = Algorithm::TrunkClassifier::DataWrapper->new();
	my @colnames = @{$self->{"colnames"}};
	my @rownames = @{$self->{"rownames"}};
	my @classVector = @{$self->{"class_vector"}};
	my @dataMatrix;
	foreach my $arrayRef (@{$self->{"data_matrix"}}){
		my @arrayCopy = @{$arrayRef};
		push(@dataMatrix, \@arrayCopy);
	}

lib/Algorithm/TrunkClassifier/DataWrapper.pm  view on Meta::CPAN

	$newWrapper->{"data_matrix"} = \@dataMatrix;
	$newWrapper->{"class_vector"} = \@classVector;
	$newWrapper->{"class_one"} = $self->{"class_one"};
	$newWrapper->{"class_two"} = $self->{"class_two"};
	return $newWrapper;
}

#Description: Removes one sample from a TrunkClassifier::DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) index of sample to remove
#Return value: TrunkClassifier::DataWrapper object containing the removed sample
sub leaveOneOut($ $){
	my ($self, $index) = @_;
	my @colnames = ($self->{"colnames"}[$index]);
	my @rownames = @{$self->{"rownames"}};
	my @classVector = ($self->{"class_vector"}[$index]);
	my @matrixCol;
	for(my $row = 0; $row < scalar(@rownames); $row++){
		my @colArray = splice(@{$self->{"data_matrix"}[$row]}, $index, 1);
		push(@matrixCol, \@colArray);
	}
	splice(@{$self->{"colnames"}}, $index, 1);

lib/Algorithm/TrunkClassifier/DataWrapper.pm  view on Meta::CPAN

	$newWrapper->{"data_matrix"} = \@matrixCol;
	$newWrapper->{"class_vector"} = \@classVector;
	$newWrapper->{"class_one"} = $self->{"class_one"};
	$newWrapper->{"class_two"} = $self->{"class_two"};
	return $newWrapper;
}

#Description: Removes a percentage of samples from a TrunkClassifier::DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) split percent
#Return value: TrunkClassifier::DataWrapper object containing the removed samples
sub splitSamples($ $){
	my ($self, $split) = @_;
	my $totNumSamples = $self->getNumSamples();
	my $testSetSize = floor(($split / 100) * $totNumSamples);
	my @colnames;
	my @rownames = $self->getProbeList();
	my @classVector;
	my @matrix;
	for(my $row = 0; $row < $self->getNumProbes(); $row++){
		my @array;
		push(@matrix, \@array);

lib/Algorithm/TrunkClassifier/DataWrapper.pm  view on Meta::CPAN

	$testSet->{"data_matrix"} = \@matrix;
	$testSet->{"class_vector"} = \@classVector;
	$testSet->{"class_one"} = $self->{"class_one"};
	$testSet->{"class_two"} = $self->{"class_two"};
	return $testSet;
}

#Description: Returns the number of samples in the specified class
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) class
#Return value: Array with column indexes
sub getClassSize($ $){
	my ($self, $class) = @_;
	my $classSize = 0;
	foreach my $sampleClass (@{$self->{"class_vector"}}){
		if($sampleClass eq $class){
			$classSize++;
		}
	}
	return $classSize;
}

#Description: Returns the probe name of the probe row index given as argument
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) probe row index
#Return value: Probe name
sub getProbeName($ $){
	my ($self, $probeIndex) = @_;
	return ${$self->{"rownames"}}[$probeIndex];
}

#Description: Returns the probe row index of the probe name given as argument
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) probe name
#Return value: Probe row index
sub getProbeIndex($ $){
	my ($self, $probeName) = @_;
	for(my $probeIndex = 0; $probeIndex < $self->getNumProbes(); $probeIndex++){
		if($self->{"rownames"}[$probeIndex] eq $probeName){
			return $probeIndex;
		}
	}
	return undef;
}

#Description: Returns the data matrix row corresponding to the argument index
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) row index
#Return value: Array
sub getMatrixRow($ $){
	my ($self, $rowIndex) = @_;
	return @{$self->{"data_matrix"}[$rowIndex]};
}

#Description: Returns the sample name corresponding to the sample index given
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) sample index
#Return value: Array reference
sub getSampleName($ $){
	my ($self, $sampleIndex) = @_;
	return $self->{"colnames"}[$sampleIndex];
}

#Description: Removes a probe name from row names and its row from the data matrix
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) probe index
#Return value: None
sub removeProbe($ $){
	my ($self, $probeIndex) = @_;
	splice(@{$self->{"rownames"}}, $probeIndex, 1);
	splice(@{$self->{"data_matrix"}}, $probeIndex, 1);
}

#Description: Removes a sample name from col names, its class from class vector, and its column from the data matrix
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) sample index
#Return value: None
sub removeSample($ $){
	my ($self, $sampleIndex) = @_;
	splice(@{$self->{"colnames"}}, $sampleIndex, 1);
	splice(@{$self->{"class_vector"}}, $sampleIndex, 1);
	foreach my $rowref (@{$self->{"data_matrix"}}){
		splice(@{$rowref}, $sampleIndex, 1);
	}
}

return 1;

lib/Algorithm/TrunkClassifier/DecisionTrunk.pm  view on Meta::CPAN

package Algorithm::TrunkClassifier::DecisionTrunk;

use warnings;
use strict;

our $VERSION = 'v1.0.1';

#Description: DecisionTrunk constructor
#Parameters: (1) TrunkClassifier::DecisionTrunk class
#Return value: TrunkClassifier::DecisionTrunk object
sub new($){
	my $class = shift();
	my @names;
	my @lower;
	my @higher;
	my @lowerClass;
	my @higherClass;
	my $self = {
		"level_name" => \@names,
		"lower_threshold" => \@lower,
		"higher_threshold" => \@higher,
		"lower_class" => \@lowerClass,
		"higher_class" => \@higherClass
	};
	bless($self, $class);
	return $self;
}

#Description: Adds a decision level to the trunk
#Parameters: (1) TrunkClassifier::DecisionTrunk object, (2) level name, (3) lower threshold, (4) higher threshold, (5) lower class, (6) higher class
#Return value: None
sub addLevel($ $ $ $ $ $){
	my ($self, $levelName, $lowerT, $higherT, $lowerC, $higherC) = @_;
	push(@{$self->{"level_name"}}, $levelName);
	push(@{$self->{"lower_threshold"}}, $lowerT);
	push(@{$self->{"higher_threshold"}}, $higherT);
	push(@{$self->{"lower_class"}}, $lowerC);
	push(@{$self->{"higher_class"}}, $higherC);
}

#Description: Classifies the test set based on the thresholds in the trunk
#Parameters: (1) TrunkClassifier::DecisionTrunk object, (2) TrunkClassifier::DataWrapper object, (3) class one name, (4) class two name
#			 (5) class report array reference, (6) verbose flag
#Return value: Ratio of correct to total classification performance
sub classify($ $ $ $ $ $){
	my ($self, $testSet, $ClassOne, $classTwo, $classReport, $VERBOSE) = @_;
	my $class;
	my @classification;
	my $ratioCorrect = 0;
	for(my $sampleIndex = 0; $sampleIndex < $testSet->getNumSamples(); $sampleIndex++){
		$class = "";
		for(my $levelIndex = 0; $levelIndex < scalar(@{$self->{"level_name"}}); $levelIndex++){
			my $probeIndex = $testSet->getProbeIndex($self->{"level_name"}[$levelIndex]);
			my @probeRow = $testSet->getMatrixRow($probeIndex);
			if($probeRow[$sampleIndex] <= $self->{"lower_threshold"}[$levelIndex]){

lib/Algorithm/TrunkClassifier/DecisionTrunk.pm  view on Meta::CPAN

			$ratioCorrect++;
		}
	}
	$ratioCorrect /= $testSet->getNumSamples();
	return $ratioCorrect;
}

#Description: Returns a text report of the trunk structure
#Parameters: (1) TrunkClassifier::DecisionTrunk object
#Return value: String containing the trunk structure
sub report($){
	my $self = shift();
	my $report = "";
	for(my $level = 0; $level < scalar(@{$self->{"level_name"}}); $level++){
		my $name = $self->{"level_name"}[$level];
		my $lowerT = $self->{"lower_threshold"}[$level];
		my $lowerC = $self->{"lower_class"}[$level];
		my $higherT = $self->{"higher_threshold"}[$level];
		my $higherC = $self->{"higher_class"}[$level];
		$report .= "\t$name\n<= $lowerT ($lowerC)\t\t> $higherT ($higherC)\n\n";
	}

lib/Algorithm/TrunkClassifier/Util.pm  view on Meta::CPAN

package Algorithm::TrunkClassifier::Util;

use warnings;
use strict;

our $VERSION = 'v1.0.1';

#Description: Sorts two arrays in accending order based on values in the first
#Parameters: (1) Numerical array reference, (2) second array reference
#Return value: None
sub dataSort($ $){
	my ($numArrayRef, $secondArrayRef) = @_;
	my $limiter = 1;
	for(my $outer = 0; $outer < scalar(@{$numArrayRef}); $outer++){
		for(my $inner = 0; $inner < scalar(@{$numArrayRef}) - $limiter; $inner++){
			if(${$numArrayRef}[$inner] > ${$numArrayRef}[$inner+1]){
				my $buffer = ${$numArrayRef}[$inner];
				${$numArrayRef}[$inner] = ${$numArrayRef}[$inner+1];
				${$numArrayRef}[$inner+1] = $buffer;
				$buffer = ${$secondArrayRef}[$inner];
				${$secondArrayRef}[$inner] = ${$secondArrayRef}[$inner+1];



( run in 0.290 second using v1.01-cache-2.11-cpan-87723dcf8b7 )