Algorithm-TrunkClassifier
view release on metacpan or search on metacpan
Algorithm/TrunkClassifier/FeatureSelection.xs view on Meta::CPAN
int
indTTest(expData, numFeatures, numSamples, sampleNames, normal, malign)
double ** expData
int numFeatures
int numSamples
char ** sampleNames
char * normal
char * malign
OUTPUT:
RETVAL
CLEANUP:
int i = 0;
while(expData[i] != NULL){
free(expData[i]);
i++;
}
free(expData);
free(sampleNames);
Algorithm/TrunkClassifier/ppport.h view on Meta::CPAN
PTR2ul|5.007001||p
PTRV|5.006000||p
PUSHMARK|||
PUSH_MULTICALL||5.009005|
PUSHi|||
PUSHmortal|5.009002||p
PUSHn|||
PUSHp|||
PUSHs|||
PUSHu|5.004000||p
PUTBACK|||
PerlIO_clearerr||5.007003|
PerlIO_close||5.007003|
PerlIO_context_layers||5.009004|
PerlIO_eof||5.007003|
PerlIO_error||5.007003|
PerlIO_fileno||5.007003|
PerlIO_fill||5.007003|
PerlIO_flush||5.007003|
PerlIO_get_base||5.007003|
PerlIO_get_bufsiz||5.007003|
Algorithm/TrunkClassifier/ppport.h view on Meta::CPAN
{
dSP;
SV* sv = newSVpv(p, 0);
PUSHMARK(sp);
eval_sv(sv, G_SCALAR);
SvREFCNT_dec(sv);
SPAGAIN;
sv = POPs;
PUTBACK;
if (croak_on_error && SvTRUE(GvSV(errgv)))
croak(SvPVx(GvSV(errgv), na));
return sv;
}
#endif
#endif
Algorithm/TrunkClassifier/typemap view on Meta::CPAN
double ** doubleMatrix
char ** stringArray
INPUT
doubleMatrix
AV* array;
AV* temprow;
I32 numRows;
I32 numCols;
I32 rowIndex;
I32 colIndex;
SV** tempSV;
double** matrix;
lib/Algorithm/TrunkClassifier.pm view on Meta::CPAN
our %EXPORT_TAGS = ( 'all' => [ qw(
runClassifier
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
#Classifier arguments
my $CLASSIFY = "loocv"; #Classification procedure (loocv|split|dual)
my $SPLITPERCENT = 20; #Percentage of samples to use as test set when using -c split
my $TESTSET = ""; #Name of test dataset when using -c dual
my $CLASSNAME = "TISSUE"; #Name of classification variable
my $OUTPUT = "."; #Name of output folder
my $LEVELS = 0; #Number of levels in decision trunks (forced)
my $PROSPECT = ""; #Check input data without running classifier
my $SUPPFILE = ""; #File containing class information
my $VERBOSE = 0; #Report progress during classifier run
my $USEALL = 0; #Circumvent level selection and use all trunks for classification
my $DATAFILE = ""; #File containing input data
#Description: Wrapper function for running the decision trunk classifier
#Parameters: Command line arguments
#Return value: None
sub runClassifier{
#Handle commands line arguments
my $processor = Algorithm::TrunkClassifier::CommandProcessor->new(\$CLASSIFY, \$SPLITPERCENT, \$TESTSET, \$CLASSNAME, \$OUTPUT, \$LEVELS, \$PROSPECT, \$SUPPFILE, \$VERBOSE, \$USEALL, \$DATAFILE);
$processor->processCmd(@_);
#Read input data
if($VERBOSE){
print("Trunk classifier: Reading input data\n");
}
my $dataWrapper = Algorithm::TrunkClassifier::DataWrapper->new($CLASSNAME, $PROSPECT, $SUPPFILE, $DATAFILE, $VERBOSE, "input data file");
my $testset;
if($CLASSIFY eq "dual"){
$testset = Algorithm::TrunkClassifier::DataWrapper->new($CLASSNAME, $PROSPECT, $SUPPFILE, $TESTSET, $VERBOSE, "testset data file");
lib/Algorithm/TrunkClassifier.pm view on Meta::CPAN
last;
}
}
if(!$found){
die "Error: Probe '$query' in input data file not found in testset data file\n";
}
}
}
#Run cross validation loop
Algorithm::TrunkClassifier::Classification->trainAndClassify($dataWrapper, $testset, $CLASSIFY, $SPLITPERCENT, $TESTSET, $CLASSNAME, $OUTPUT, $LEVELS, $VERBOSE, $DATAFILE, $USEALL);
}
return 1;
lib/Algorithm/TrunkClassifier/Classification.pm view on Meta::CPAN
our $VERSION = "v1.0.1";
#Description: Function responsible for building decision trunks and classifying test samples using LOOCV
#Parameters: (1) Package, (2) input dataset, (3) test dataset, (4) classification procedure, (5) split percent,
# (6) testset data file name, (7) classification variable name, (8) output folder name,
# (9) number of levels, (10) verbose flag, (11) input data file name (12) useall flag
#Return value: None
sub trainAndClassify($ $ $ $ $ $ $ $ $ $ $ $ $){
shift(@_);
my ($dataWrapper, $testset, $CLASSIFY, $SPLITPERCENT, $TESTFILE, $CLASSNAME, $OUTPUT, $LEVELS, $VERBOSE, $DATAFILE, $USEALL) = @_;
#Create output files
if(!-e $OUTPUT && $OUTPUT ne "."){
system("mkdir $OUTPUT");
}
open(PERFORMANCE, ">$OUTPUT/performance.txt") or die "Error: Unable to create output file\n";
open(LOO_TRUNKS, ">$OUTPUT/loo_trunks.txt") or die "Error: Unable to create output file\n";
open(CTS_TRUNKS, ">$OUTPUT/cts_trunks.txt") or die "Error: Unable to create output file\n";
open(REPORT, ">$OUTPUT/class_report.txt") or die "Error: Unable to create output file\n";
open(LOG, ">$OUTPUT/log.txt") or die "Error: Unable to create output file\n";
#Establish training and test set
my $trainingSet;
my $testSet;
if($CLASSIFY eq "loocv"){
$trainingSet = $dataWrapper->copy();
}
elsif($CLASSIFY eq "split"){
my $containsBoth = 0;
while(!$containsBoth){
pod/TrunkClassifier.pod view on Meta::CPAN
the classifier. This is done for every sample in the input dataset. See the the algorithm
publication for more details. A PubMed link can be found in L</"SEE ALSO">.
=head2 ARGUMENTS
Following installation, the algorithm can be run from the terminal using the
run_classifier.pl script supplied in the t/ folder. The command should be in this form
C<perl run_classifier.pl [Options] [Input data file]>
=head3 INPUT DATA FILE
The last argument must be the name of the input data file containing the expression
data in table format, where columns are tab-separated. The first row must contains
column names and the first column must contain row names. Samples need to be given in
columns and probes/attributes in rows. Before the name of the input data file, a number of
optional arguments may be given, see L</"OPTIONS"> below. A data file containing random data
is provided in the t/ folder.
=head3 META DATA
pod/TrunkClassifier.pod view on Meta::CPAN
This option circumvents selection of decision levels and makes the algorithm use trunks
with 1, 2, 3, 4 and 5 decision levels during classification.
=item C<-h>
This option causes argument documentation to be printed to the terminal.
=back
=head2 OUTPUT
The algorithm produces five files as output: F<performance.txt>, F<loo_trunks.txt>,
F<cts_trunks>, F<class_report.txt> and F<log.txt>. The classification accuracy
can be found in F<performance.txt>. In case of leave-one-out cross validation, the
accuracy for each fold is reported along with the average accuracy across all folds.
Since the test set consists of one sample, the accuracy of one LOOCV fold is either
0 % (wrong) or 100 % (correct). For split-sample and dual datasets classification, only
the average accuracy is reported since there is only one test set.
The F<loo_trunks.txt> file contains the decision trunks resulting from leave-one-out
( run in 0.292 second using v1.01-cache-2.11-cpan-4e96b696675 )