view release on metacpan or search on metacpan
lib/Algorithm/TrunkClassifier/Classification.pm view on Meta::CPAN
91011121314151617181920212223242526272829use
POSIX;
our
$VERSION
=
"v1.0.1"
;
#Description: Function responsible for building decision trunks and classifying test samples using LOOCV
#Parameters: (1) Package, (2) input dataset, (3) test dataset, (4) classification procedure, (5) split percent,
# (6) testset data file name, (7) classification variable name, (8) output folder name,
# (9) number of levels, (10) verbose flag, (11) input data file name (12) useall flag
#Return value: None
sub
trainAndClassify($ $ $ $ $ $ $ $ $ $ $ $ $){
shift
(
@_
);
my
(
$dataWrapper
,
$testset
,
$CLASSIFY
,
$SPLITPERCENT
,
$TESTFILE
,
$CLASSNAME
,
$OUTPUT
,
$LEVELS
,
$VERBOSE
,
$DATAFILE
,
$USEALL
) =
@_
;
#Create output files
if
(!-e
$OUTPUT
&&
$OUTPUT
ne
"."
){
system
(
"mkdir $OUTPUT"
);
}
open
(PERFORMANCE,
">$OUTPUT/performance.txt"
) or
die
"Error: Unable to create output file\n"
;
open
(LOO_TRUNKS,
">$OUTPUT/loo_trunks.txt"
) or
die
"Error: Unable to create output file\n"
;
open
(CTS_TRUNKS,
">$OUTPUT/cts_trunks.txt"
) or
die
"Error: Unable to create output file\n"
;
lib/Algorithm/TrunkClassifier/Classification.pm view on Meta::CPAN
270271272273274275276277278279280281282283284285286287288289290
close
(LOG);
if
(
$VERBOSE
){
(
"Trunk classifier: Job finished\n"
);
}
}
#Description: Wrapper for the trunk build loop
#Parameters: (1) Training dataset, (2) level limit, (3) sample index, (4) feature occurrence hash ref,
# (5) selected features hash ref, (6) level break flag ref, (7) verbose flag
#Return value: Decision trunk object
sub
buildTrunk($ $ $ $ $ $ $){
my
(
$buildSet
,
$levelLimit
,
$sampleIndex
,
$featOccurRef
,
$selFeatRef
,
$levelBreakRef
,
$VERBOSE
) =
@_
;
#Trunk build loop
my
$decisionTrunk
= Algorithm::TrunkClassifier::DecisionTrunk->new();
my
$noSampleBreak
= 0;
for
(
my
$levelIndex
= 1;
$levelIndex
<=
$levelLimit
;
$levelIndex
++){
#Perform feature selection
my
$featureName
;
my
$featureIndex
;
lib/Algorithm/TrunkClassifier/Classification.pm view on Meta::CPAN
452453454455456457458459460461462463464465466467468469470471472
if
(
$noSampleBreak
){
last
;
}
}
return
$decisionTrunk
;
}
#Description: Determine the decision trunk level with highest feature selection stability
#Parameters: (1) Hash reference containing selected features, (2) number of samples in the dataset
#Return value: Number of decision trunk levels to use for classification
sub
stabilityCheck($ $){
my
(
$hashRef
,
$numSamples
) =
@_
;
my
%featOccurrence
= %{
$hashRef
};
my
$numThresh
= 6;
my
$chosenLevel
= 0;
foreach
my
$levelIndex
(1 .. 5){
if
(!
$featOccurrence
{
$levelIndex
}){
next
;
}
my
%features
= %{
$featOccurrence
{
$levelIndex
}};
my
$numFeats
=
scalar
(
keys
(
%features
));
lib/Algorithm/TrunkClassifier/CommandProcessor.pm view on Meta::CPAN
5678910111213141516171819202122232425our
$VERSION
=
'v1.0.1'
;
my
%commands
;
#Description: Command processor constructor
#Parameters: (1) TrunkClassifier::CommandProcessor, (2) classification procedure ref, (3), split ref, (4) testset ref,
# (5) class name variable ref, (6) output folder variable ref, (7) level variable ref, (8) prospect variable ref,
# (9) supplementary file variable ref, (10) verbose variable ref, (11) useall variable ref, (12) input data file variable ref
#Return value: TrunkClassifier::CommandProcessor object
sub
new($ $ $ $ $ $ $ $ $ $ $ $ $){
my
(
$class
,
$classifyRef
,
$splitPercentRef
,
$testsetRef
,
$classnameRef
,
$outputRef
,
$levelRef
,
$prospectRef
,
$suppfileRef
,
$verboseRef
,
$useallRef
,
$datafileRef
) =
@_
;
%commands
= (
"-p"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'loocv|split|dual'
,
"var"
=>
$classifyRef
,
"sub"
=> \
&checkTestsetArg
},
"--procedure"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'loocv|split|dual'
,
"var"
=>
$classifyRef
},
"-e"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'^[1-9][0-9]?$'
,
"var"
=>
$splitPercentRef
},
"--split"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'^[1-9][0-9]?$'
,
"var"
=>
$splitPercentRef
},
"-t"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'.+'
,
"var"
=>
$testsetRef
},
"--testset"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'.+'
,
"var"
=>
$testsetRef
},
"-c"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'.+'
,
"var"
=>
$classnameRef
},
"--classvar"
=> {
"numArgs"
=> 1,
"validArgs"
=>
'.+'
,
"var"
=>
$classnameRef
},
lib/Algorithm/TrunkClassifier/CommandProcessor.pm view on Meta::CPAN
8687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
}
}
if
(!${
$self
->{
"input"
}}){
die
"Error: Input data file not supplied\n"
;
}
}
#Description: Checks that the -t option is supplied if -c dual is used
#Parameters: (1) The -c argument, (2) command line arguments
#Return value: None
sub
checkTestsetArg($ $){
my
(
$argument
,
$comLineRef
) =
@_
;
if
(
$argument
eq
"dual"
){
my
$foundT
= 0;
foreach
my
$arg
(@{
$comLineRef
}){
if
(
$arg
eq
"-t"
){
$foundT
= 1;
last
;
}
}
if
(!
$foundT
){
die
"Error: Command line option -t must be given when -c dual is used\n"
;
}
}
}
#Description: Command line help
#Parameters: None
#Return value: None
sub
commandHelp(){
my
$doc
= <<END;
Usage
perl trunk_classifier.pl [Options] [File]
Options
-p, --procedure Classification procedure to
use
[loocv|
split
|dual]
-t, --testset Dataset to classify
when
using -c dual
-c, --classvar Name of the classification variable to
use
-o, --output Name of the output folder
lib/Algorithm/TrunkClassifier/DataWrapper.pm view on Meta::CPAN
373839404142434445464748495051525354555657
}
#Read input data file
readExpData(
$self
,
$className
,
$prospect
,
$dataFileName
,
$datasetType
);
return
$self
;
}
#Description: Reads the supplementary file and writes new input data file with meta data
#Parameters: (1) supplementary file name, (2) input data file name, (3) dataset type
#Return value: New input data file name
sub
readSuppFile($ $ $ $){
my
(
$suppFileName
,
$dataFileName
,
$VERBOSE
,
$datasetType
) =
@_
;
#Read supplementary file
open
(SUPP_FILE,
$suppFileName
) or
die
"Error: Unable to open supplementary file '$suppFileName'\n"
;
my
@suppFile
= <SUPP_FILE>;
my
$content
=
join
(
""
,
@suppFile
);
$content
=~ s/\r|\n\r|\r\n/\n/g;
@suppFile
=
split
(/\n+/,
$content
);
close
(SUPP_FILE);
lib/Algorithm/TrunkClassifier/DataWrapper.pm view on Meta::CPAN
147148149150151152153154155156157158159160161162163164165166167
}
(DATA_FILE
$meta
.
join
(
""
,
@dataFile
));
close
(DATA_FILE);
return
$dataFileName
;
}
#Description: Reads input data file with expression values and meta data
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) classification variable name
# (3) prospect flag, (4) input data file name, (5) dataset type
#Return value: None
sub
readExpData($ $ $ $ $){
my
(
$self
,
$className
,
$prospect
,
$dataFileName
,
$datasetType
) =
@_
;
$className
=
uc
(
$className
);
#Read input data file
if
(!
open
(DATA_FILE,
$dataFileName
)){
die
"Error: Unable to open $datasetType '$dataFileName'\n"
;
}
my
@dataFile
= <DATA_FILE>;
close
(DATA_FILE);
my
$content
=
join
(
""
,
@dataFile
);
lib/Algorithm/TrunkClassifier/DataWrapper.pm view on Meta::CPAN
330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
$self
->{
"rownames"
} = \
@probeNames
;
$self
->{
"data_matrix"
} = \
@dataMatrix
;
$self
->{
"class_vector"
} = \
@incClassVector
;
$self
->{
"class_one"
} =
$classOne
;
$self
->{
"class_two"
} =
$classTwo
;
}
#Description: Returns the number of samples in the dataset
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Number of elements in "colnames" attribute
sub
getNumSamples($){
my
$self
=
shift
(
@_
);
return
scalar
(@{
$self
->{
"colnames"
}});
}
#Description: Returns the number of probes in the dataset
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Number of rows in "rownames" array
sub
getNumProbes($){
my
$self
=
shift
(
@_
);
return
scalar
(@{
$self
->{
"rownames"
}});
}
#Description: Returns the row names of the DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Array of row names
sub
getProbeList($){
my
$self
=
shift
(
@_
);
return
@{
$self
->{
"rownames"
}};
}
#Description: Returns a reference to the data matrix
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Array reference
sub
getDataMatrix($){
my
$self
=
shift
(
@_
);
return
$self
->{
"data_matrix"
};
}
#Description: Returns a reference to the class vector
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Array reference
sub
getClassVector($){
my
$self
=
shift
(
@_
);
return
$self
->{
"class_vector"
};
}
#Description: Returns the name of class one
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Class name
sub
getClassOneName($){
my
$self
=
shift
(
@_
);
return
$self
->{
"class_one"
};
}
#Description: Returns the name of class two
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: Class name
sub
getClassTwoName($){
my
$self
=
shift
(
@_
);
return
$self
->{
"class_two"
};
}
#Description: Returns a copy of a TrunkClassifier::DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object
#Return value: New TrunkClassifier::DataWrapper object
sub
copy($){
my
$self
=
shift
(
@_
);
my
$newWrapper
= Algorithm::TrunkClassifier::DataWrapper->new();
my
@colnames
= @{
$self
->{
"colnames"
}};
my
@rownames
= @{
$self
->{
"rownames"
}};
my
@classVector
= @{
$self
->{
"class_vector"
}};
my
@dataMatrix
;
foreach
my
$arrayRef
(@{
$self
->{
"data_matrix"
}}){
my
@arrayCopy
= @{
$arrayRef
};
push
(
@dataMatrix
, \
@arrayCopy
);
}
lib/Algorithm/TrunkClassifier/DataWrapper.pm view on Meta::CPAN
409410411412413414415416417418419420421422423424425426427428429
$newWrapper
->{
"data_matrix"
} = \
@dataMatrix
;
$newWrapper
->{
"class_vector"
} = \
@classVector
;
$newWrapper
->{
"class_one"
} =
$self
->{
"class_one"
};
$newWrapper
->{
"class_two"
} =
$self
->{
"class_two"
};
return
$newWrapper
;
}
#Description: Removes one sample from a TrunkClassifier::DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) index of sample to remove
#Return value: TrunkClassifier::DataWrapper object containing the removed sample
sub
leaveOneOut($ $){
my
(
$self
,
$index
) =
@_
;
my
@colnames
= (
$self
->{
"colnames"
}[
$index
]);
my
@rownames
= @{
$self
->{
"rownames"
}};
my
@classVector
= (
$self
->{
"class_vector"
}[
$index
]);
my
@matrixCol
;
for
(
my
$row
= 0;
$row
<
scalar
(
@rownames
);
$row
++){
my
@colArray
=
splice
(@{
$self
->{
"data_matrix"
}[
$row
]},
$index
, 1);
push
(
@matrixCol
, \
@colArray
);
}
splice
(@{
$self
->{
"colnames"
}},
$index
, 1);
lib/Algorithm/TrunkClassifier/DataWrapper.pm view on Meta::CPAN
434435436437438439440441442443444445446447448449450451452453454
$newWrapper
->{
"data_matrix"
} = \
@matrixCol
;
$newWrapper
->{
"class_vector"
} = \
@classVector
;
$newWrapper
->{
"class_one"
} =
$self
->{
"class_one"
};
$newWrapper
->{
"class_two"
} =
$self
->{
"class_two"
};
return
$newWrapper
;
}
#Description: Removes a percentage of samples from a TrunkClassifier::DataWrapper object
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) split percent
#Return value: TrunkClassifier::DataWrapper object containing the removed samples
sub
splitSamples($ $){
my
(
$self
,
$split
) =
@_
;
my
$totNumSamples
=
$self
->getNumSamples();
my
$testSetSize
= floor((
$split
/ 100) *
$totNumSamples
);
my
@colnames
;
my
@rownames
=
$self
->getProbeList();
my
@classVector
;
my
@matrix
;
for
(
my
$row
= 0;
$row
<
$self
->getNumProbes();
$row
++){
my
@array
;
push
(
@matrix
, \
@array
);
lib/Algorithm/TrunkClassifier/DataWrapper.pm view on Meta::CPAN
470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
$testSet
->{
"data_matrix"
} = \
@matrix
;
$testSet
->{
"class_vector"
} = \
@classVector
;
$testSet
->{
"class_one"
} =
$self
->{
"class_one"
};
$testSet
->{
"class_two"
} =
$self
->{
"class_two"
};
return
$testSet
;
}
#Description: Returns the number of samples in the specified class
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) class
#Return value: Array with column indexes
sub
getClassSize($ $){
my
(
$self
,
$class
) =
@_
;
my
$classSize
= 0;
foreach
my
$sampleClass
(@{
$self
->{
"class_vector"
}}){
if
(
$sampleClass
eq
$class
){
$classSize
++;
}
}
return
$classSize
;
}
#Description: Returns the probe name of the probe row index given as argument
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) probe row index
#Return value: Probe name
sub
getProbeName($ $){
my
(
$self
,
$probeIndex
) =
@_
;
return
${
$self
->{
"rownames"
}}[
$probeIndex
];
}
#Description: Returns the probe row index of the probe name given as argument
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) probe name
#Return value: Probe row index
sub
getProbeIndex($ $){
my
(
$self
,
$probeName
) =
@_
;
for
(
my
$probeIndex
= 0;
$probeIndex
<
$self
->getNumProbes();
$probeIndex
++){
if
(
$self
->{
"rownames"
}[
$probeIndex
] eq
$probeName
){
return
$probeIndex
;
}
}
return
undef
;
}
#Description: Returns the data matrix row corresponding to the argument index
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) row index
#Return value: Array
sub
getMatrixRow($ $){
my
(
$self
,
$rowIndex
) =
@_
;
return
@{
$self
->{
"data_matrix"
}[
$rowIndex
]};
}
#Description: Returns the sample name corresponding to the sample index given
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) sample index
#Return value: Array reference
sub
getSampleName($ $){
my
(
$self
,
$sampleIndex
) =
@_
;
return
$self
->{
"colnames"
}[
$sampleIndex
];
}
#Description: Removes a probe name from row names and its row from the data matrix
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) probe index
#Return value: None
sub
removeProbe($ $){
my
(
$self
,
$probeIndex
) =
@_
;
splice
(@{
$self
->{
"rownames"
}},
$probeIndex
, 1);
splice
(@{
$self
->{
"data_matrix"
}},
$probeIndex
, 1);
}
#Description: Removes a sample name from col names, its class from class vector, and its column from the data matrix
#Parameters: (1) TrunkClassifier::DataWrapper object, (2) sample index
#Return value: None
sub
removeSample($ $){
my
(
$self
,
$sampleIndex
) =
@_
;
splice
(@{
$self
->{
"colnames"
}},
$sampleIndex
, 1);
splice
(@{
$self
->{
"class_vector"
}},
$sampleIndex
, 1);
foreach
my
$rowref
(@{
$self
->{
"data_matrix"
}}){
splice
(@{
$rowref
},
$sampleIndex
, 1);
}
}
return
1;
lib/Algorithm/TrunkClassifier/DecisionTrunk.pm view on Meta::CPAN
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455use
warnings;
use
strict;
our
$VERSION
=
'v1.0.1'
;
#Description: DecisionTrunk constructor
#Parameters: (1) TrunkClassifier::DecisionTrunk class
#Return value: TrunkClassifier::DecisionTrunk object
sub
new($){
my
$class
=
shift
();
my
@names
;
my
@lower
;
my
@higher
;
my
@lowerClass
;
my
@higherClass
;
my
$self
= {
"level_name"
=> \
@names
,
"lower_threshold"
=> \
@lower
,
"higher_threshold"
=> \
@higher
,
"lower_class"
=> \
@lowerClass
,
"higher_class"
=> \
@higherClass
};
bless
(
$self
,
$class
);
return
$self
;
}
#Description: Adds a decision level to the trunk
#Parameters: (1) TrunkClassifier::DecisionTrunk object, (2) level name, (3) lower threshold, (4) higher threshold, (5) lower class, (6) higher class
#Return value: None
sub
addLevel($ $ $ $ $ $){
my
(
$self
,
$levelName
,
$lowerT
,
$higherT
,
$lowerC
,
$higherC
) =
@_
;
push
(@{
$self
->{
"level_name"
}},
$levelName
);
push
(@{
$self
->{
"lower_threshold"
}},
$lowerT
);
push
(@{
$self
->{
"higher_threshold"
}},
$higherT
);
push
(@{
$self
->{
"lower_class"
}},
$lowerC
);
push
(@{
$self
->{
"higher_class"
}},
$higherC
);
}
#Description: Classifies the test set based on the thresholds in the trunk
#Parameters: (1) TrunkClassifier::DecisionTrunk object, (2) TrunkClassifier::DataWrapper object, (3) class one name, (4) class two name
# (5) class report array reference, (6) verbose flag
#Return value: Ratio of correct to total classification performance
sub
classify($ $ $ $ $ $){
my
(
$self
,
$testSet
,
$ClassOne
,
$classTwo
,
$classReport
,
$VERBOSE
) =
@_
;
my
$class
;
my
@classification
;
my
$ratioCorrect
= 0;
for
(
my
$sampleIndex
= 0;
$sampleIndex
<
$testSet
->getNumSamples();
$sampleIndex
++){
$class
=
""
;
for
(
my
$levelIndex
= 0;
$levelIndex
<
scalar
(@{
$self
->{
"level_name"
}});
$levelIndex
++){
my
$probeIndex
=
$testSet
->getProbeIndex(
$self
->{
"level_name"
}[
$levelIndex
]);
my
@probeRow
=
$testSet
->getMatrixRow(
$probeIndex
);
if
(
$probeRow
[
$sampleIndex
] <=
$self
->{
"lower_threshold"
}[
$levelIndex
]){
lib/Algorithm/TrunkClassifier/DecisionTrunk.pm view on Meta::CPAN
757677787980818283848586878889909192939495
$ratioCorrect
++;
}
}
$ratioCorrect
/=
$testSet
->getNumSamples();
return
$ratioCorrect
;
}
#Description: Returns a text report of the trunk structure
#Parameters: (1) TrunkClassifier::DecisionTrunk object
#Return value: String containing the trunk structure
sub
report($){
my
$self
=
shift
();
my
$report
=
""
;
for
(
my
$level
= 0;
$level
<
scalar
(@{
$self
->{
"level_name"
}});
$level
++){
my
$name
=
$self
->{
"level_name"
}[
$level
];
my
$lowerT
=
$self
->{
"lower_threshold"
}[
$level
];
my
$lowerC
=
$self
->{
"lower_class"
}[
$level
];
my
$higherT
=
$self
->{
"higher_threshold"
}[
$level
];
my
$higherC
=
$self
->{
"higher_class"
}[
$level
];
$report
.=
"\t$name\n<= $lowerT ($lowerC)\t\t> $higherT ($higherC)\n\n"
;
}
lib/Algorithm/TrunkClassifier/Util.pm view on Meta::CPAN
123456789101112131415161718192021package
Algorithm::TrunkClassifier::Util;
use
warnings;
use
strict;
our
$VERSION
=
'v1.0.1'
;
#Description: Sorts two arrays in accending order based on values in the first
#Parameters: (1) Numerical array reference, (2) second array reference
#Return value: None
sub
dataSort($ $){
my
(
$numArrayRef
,
$secondArrayRef
) =
@_
;
my
$limiter
= 1;
for
(
my
$outer
= 0;
$outer
<
scalar
(@{
$numArrayRef
});
$outer
++){
for
(
my
$inner
= 0;
$inner
<
scalar
(@{
$numArrayRef
}) -
$limiter
;
$inner
++){
if
(${
$numArrayRef
}[
$inner
] > ${
$numArrayRef
}[
$inner
+1]){
my
$buffer
= ${
$numArrayRef
}[
$inner
];
${
$numArrayRef
}[
$inner
] = ${
$numArrayRef
}[
$inner
+1];
${
$numArrayRef
}[
$inner
+1] =
$buffer
;
$buffer
= ${
$secondArrayRef
}[
$inner
];
${
$secondArrayRef
}[
$inner
] = ${
$secondArrayRef
}[
$inner
+1];