Algorithm-TrunkClassifier

 view release on metacpan or  search on metacpan

Algorithm/TrunkClassifier/FeatureSelection.xs  view on Meta::CPAN


int
indTTest(expData, numFeatures, numSamples, sampleNames, normal, malign)
	double ** 	expData
	int 		numFeatures
	int 		numSamples
	char ** 	sampleNames
	char * 		normal
	char * 		malign
	
	OUTPUT:
		RETVAL
	
	CLEANUP:
		int i = 0;
		while(expData[i] != NULL){
			free(expData[i]);
			i++;
		}
		free(expData);
		free(sampleNames);

Algorithm/TrunkClassifier/ppport.h  view on Meta::CPAN

PTR2ul|5.007001||p
PTRV|5.006000||p
PUSHMARK|||
PUSH_MULTICALL||5.009005|
PUSHi|||
PUSHmortal|5.009002||p
PUSHn|||
PUSHp|||
PUSHs|||
PUSHu|5.004000||p
PUTBACK|||
PerlIO_clearerr||5.007003|
PerlIO_close||5.007003|
PerlIO_context_layers||5.009004|
PerlIO_eof||5.007003|
PerlIO_error||5.007003|
PerlIO_fileno||5.007003|
PerlIO_fill||5.007003|
PerlIO_flush||5.007003|
PerlIO_get_base||5.007003|
PerlIO_get_bufsiz||5.007003|

Algorithm/TrunkClassifier/ppport.h  view on Meta::CPAN

{
    dSP;
    SV* sv = newSVpv(p, 0);

    PUSHMARK(sp);
    eval_sv(sv, G_SCALAR);
    SvREFCNT_dec(sv);

    SPAGAIN;
    sv = POPs;
    PUTBACK;

    if (croak_on_error && SvTRUE(GvSV(errgv)))
	croak(SvPVx(GvSV(errgv), na));

    return sv;
}

#endif
#endif

Algorithm/TrunkClassifier/typemap  view on Meta::CPAN

double **	doubleMatrix
char **		stringArray

INPUT

doubleMatrix
	AV* array;
	AV* temprow;
	I32 numRows;
	I32 numCols;
	I32 rowIndex;
	I32 colIndex;
	SV** tempSV;
	double** matrix;

lib/Algorithm/TrunkClassifier.pm  view on Meta::CPAN

our %EXPORT_TAGS = ( 'all' => [ qw(
	runClassifier
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

#Classifier arguments
my $CLASSIFY = "loocv";		#Classification procedure (loocv|split|dual)
my $SPLITPERCENT = 20;		#Percentage of samples to use as test set when using -c split
my $TESTSET = "";			#Name of test dataset when using -c dual
my $CLASSNAME = "TISSUE";	#Name of classification variable
my $OUTPUT = ".";			#Name of output folder
my $LEVELS = 0;				#Number of levels in decision trunks (forced)
my $PROSPECT = "";			#Check input data without running classifier
my $SUPPFILE = "";			#File containing class information
my $VERBOSE = 0;			#Report progress during classifier run
my $USEALL = 0;				#Circumvent level selection and use all trunks for classification
my $DATAFILE = "";			#File containing input data

#Description: Wrapper function for running the decision trunk classifier
#Parameters: Command line arguments
#Return value: None
sub runClassifier{
	#Handle commands line arguments
	my $processor = Algorithm::TrunkClassifier::CommandProcessor->new(\$CLASSIFY, \$SPLITPERCENT, \$TESTSET, \$CLASSNAME, \$OUTPUT, \$LEVELS, \$PROSPECT, \$SUPPFILE, \$VERBOSE, \$USEALL, \$DATAFILE);
	$processor->processCmd(@_);
	
	#Read input data
	if($VERBOSE){
		print("Trunk classifier: Reading input data\n");
	}
	my $dataWrapper = Algorithm::TrunkClassifier::DataWrapper->new($CLASSNAME, $PROSPECT, $SUPPFILE, $DATAFILE, $VERBOSE, "input data file");
	my $testset;
	if($CLASSIFY eq "dual"){
		$testset = Algorithm::TrunkClassifier::DataWrapper->new($CLASSNAME, $PROSPECT, $SUPPFILE, $TESTSET, $VERBOSE, "testset data file");

lib/Algorithm/TrunkClassifier.pm  view on Meta::CPAN

					last;
				}
			}
			if(!$found){
				die "Error: Probe '$query' in input data file not found in testset data file\n";
			}
		}
	}
	
	#Run cross validation loop
	Algorithm::TrunkClassifier::Classification->trainAndClassify($dataWrapper, $testset, $CLASSIFY, $SPLITPERCENT, $TESTSET, $CLASSNAME, $OUTPUT, $LEVELS, $VERBOSE, $DATAFILE, $USEALL);
}

return 1;

lib/Algorithm/TrunkClassifier/Classification.pm  view on Meta::CPAN


our $VERSION = "v1.0.1";

#Description: Function responsible for building decision trunks and classifying test samples using LOOCV
#Parameters: (1) Package, (2) input dataset, (3) test dataset, (4) classification procedure, (5) split percent,
#            (6) testset data file name, (7) classification variable name, (8) output folder name,
#            (9) number of levels, (10) verbose flag, (11) input data file name (12) useall flag
#Return value: None
sub trainAndClassify($ $ $ $ $ $ $ $ $ $ $ $ $){
	shift(@_);
	my ($dataWrapper, $testset, $CLASSIFY, $SPLITPERCENT, $TESTFILE, $CLASSNAME, $OUTPUT, $LEVELS, $VERBOSE, $DATAFILE, $USEALL) = @_;
	
	#Create output files
	if(!-e $OUTPUT && $OUTPUT ne "."){
		system("mkdir $OUTPUT");
	}
	open(PERFORMANCE, ">$OUTPUT/performance.txt") or die "Error: Unable to create output file\n";
	open(LOO_TRUNKS, ">$OUTPUT/loo_trunks.txt") or die "Error: Unable to create output file\n";
	open(CTS_TRUNKS, ">$OUTPUT/cts_trunks.txt") or die "Error: Unable to create output file\n";
	open(REPORT, ">$OUTPUT/class_report.txt") or die "Error: Unable to create output file\n";
	open(LOG, ">$OUTPUT/log.txt") or die "Error: Unable to create output file\n";
	
	#Establish training and test set
	my $trainingSet;
	my $testSet;
	if($CLASSIFY eq "loocv"){
		$trainingSet = $dataWrapper->copy();
	}
	elsif($CLASSIFY eq "split"){
		my $containsBoth = 0;
		while(!$containsBoth){

pod/TrunkClassifier.pod  view on Meta::CPAN

the classifier. This is done for every sample in the input dataset. See the the algorithm
publication for more details. A PubMed link can be found in L</"SEE ALSO">.

=head2 ARGUMENTS

Following installation, the algorithm can be run from the terminal using the
run_classifier.pl script supplied in the t/ folder. The command should be in this form

C<perl run_classifier.pl [Options] [Input data file]>

=head3 INPUT DATA FILE

The last argument must be the name of the input data file containing the expression
data in table format, where columns are tab-separated. The first row must contains
column names and the first column must contain row names. Samples need to be given in
columns and probes/attributes in rows. Before the name of the input data file, a number of
optional arguments may be given, see L</"OPTIONS"> below. A data file containing random data
is provided in the t/ folder.

=head3 META DATA

pod/TrunkClassifier.pod  view on Meta::CPAN


This option circumvents selection of decision levels and makes the algorithm use trunks
with 1, 2, 3, 4 and 5 decision levels during classification.

=item C<-h>

This option causes argument documentation to be printed to the terminal.

=back

=head2 OUTPUT

The algorithm produces five files as output: F<performance.txt>, F<loo_trunks.txt>,
F<cts_trunks>, F<class_report.txt> and F<log.txt>. The classification accuracy
can be found in F<performance.txt>. In case of leave-one-out cross validation, the
accuracy for each fold is reported along with the average accuracy across all folds.
Since the test set consists of one sample, the accuracy of one LOOCV fold is either
0 % (wrong) or 100 % (correct). For split-sample and dual datasets classification, only
the average accuracy is reported since there is only one test set.

The F<loo_trunks.txt> file contains the decision trunks resulting from leave-one-out



( run in 0.292 second using v1.01-cache-2.11-cpan-4e96b696675 )