Algorithm-TrunkClassifier
view release on metacpan or search on metacpan
lib/Algorithm/TrunkClassifier/DataWrapper.pm view on Meta::CPAN
if($dataFile[$rowcounter] =~ /^\s*$/){
shift(@dataFile);
}
elsif($dataFile[$rowcounter] =~ /^#/){
push(@metarows, shift(@dataFile));
}
else{
$rowcounter++;
}
}
#Extract samples
my @samples = split(/\t/, shift(@dataFile));
shift(@samples);
my $totNumSamples = scalar(@samples);
if(!$totNumSamples){
die "Error: No samples in $datasetType\n";
}
#Check that class variable exists and that all samples have valid class membership
my %classes;
my %membership;
foreach my $row (@metarows){
if($row =~ /^#CLASSVAR/){
my @cols = split(/\s+/, $row);
shift(@cols);
if(!$cols[0]){
warn "Warning: CLASSVAR name missing in meta data of $datasetType\n";
next;
}
if(!$cols[1] || !$cols[2]){
warn "Warning: CLASSVAR class labels for '$cols[0]' missing in meta data of $datasetType\n";
next;
}
if($cols[1] eq $NULL_CLASS || $cols[2] eq $NULL_CLASS){
die "Error: CLASSVAR class label equals NULL CLASS in $datasetType\n";
}
my $classVarName = uc($cols[0]);
my $class1 = uc($cols[1]);
my $class2 = uc($cols[2]);
$classes{$classVarName} = {$class1 => 1, $class2 => 1};
}
if($row =~ /^#CLASSMEM/){
my @cols = split(/\s+/, $row);
shift(@cols);
if(!$cols[0]){
warn "Warning: CLASSMEM name missing in meta data of $datasetType\n";
next;
}
my $classVarName = uc(shift(@cols));
foreach my $class (@cols){
$class = uc($class);
}
$membership{$classVarName} = \@cols;
}
}
if(!$classes{$className} || !$membership{$className}){
die "Error: Missing meta data for classification variable '$className' in $datasetType\n";
}
if(scalar(@{$membership{$className}}) != $totNumSamples){
die "Error: CLASSMEM vector for '$className' and sample vector have different lengths in $datasetType\n";
}
foreach my $class (@{$membership{$className}}){
if($class ne $NULL_CLASS && !$classes{$className}{$class}){
die "Error: Invalid class label in '$className' CLASSMEM vector in $datasetType\n";
}
}
my @classVector = @{$membership{$className}};
my @classBuffer = sort(keys(%{$classes{$className}}));
my $classOne = $classBuffer[0];
my $classTwo = $classBuffer[1];
#Determine what sample indexes to include
my @includedInd;
my $classOneCount = 0;
my $classTwoCount = 0;
for(my $sampleIndex = 0; $sampleIndex < $totNumSamples; $sampleIndex++){
if($classVector[$sampleIndex] eq $classOne){
$classOneCount++;
push(@includedInd, $sampleIndex);
}
elsif($classVector[$sampleIndex] eq $classTwo){
$classTwoCount++;
push(@includedInd, $sampleIndex);
}
}
if(!$classOneCount){
die "Error: Class '$classOne' for classification variable '$className' has zero members in $datasetType\n";
}
if(!$classTwoCount){
die "Error: Class '$classTwo' for classification variable '$className' has zero members in $datasetType\n";
}
my $numIncInd = scalar(@includedInd);
#Check for sample duplicates
for(my $outer = 0; $outer < $totNumSamples - 1; $outer++){
for(my $inner = $outer + 1; $inner < $totNumSamples; $inner++){
if($samples[$outer] eq $samples[$inner]){
warn "Warning: Duplicate sample name '$samples[$outer]' at positions ", $outer + 1, " and ", $inner + 1, " in $datasetType\n";
}
}
}
#Initialise Algorithm::TrunkClassifier::DataWrapper object
my @incSampleNames;
my @incClassVector;
my @probeNames;
my @dataMatrix;
foreach my $index (@includedInd){
push(@incSampleNames, $samples[$index]);
push(@incClassVector, $classVector[$index]);
}
for(my $rowIndex = 0; $rowIndex < scalar(@dataFile); $rowIndex++){
$dataFile[$rowIndex] =~ s/,/./g;
my @cols = split(/\t/, $dataFile[$rowIndex]);
if(scalar(@cols) != $totNumSamples + 1){
die "Error: Wrong number of columns in $datasetType at probe ", $rowIndex + 1, "\n";
}
my $probe = "$rowIndex:" . shift(@cols);
push(@probeNames, $probe);
my @includedCols;
( run in 1.431 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )