ALBD
view release on metacpan or search on metacpan
utils/datasetCreator/removeExplicit.pl view on Meta::CPAN
#removes the explicit co-occurrence matrix from the squared explicit
# co-occurrence matrix. This generates a gold standard true discovery file
my $matrixFileName = '../../samples/sampleExplicitMatrix';
my $squaredMatrixFileName = '../../samples/postCutoffMatrix';
my $outputFileName = '../../samples/sampleGoldMatrix';
&removeExplicit($matrixFileName, $squaredMatrixFileName, $outputFileName);
###############################
###############################
#removes explicit knowledge ($matrixFileName) from the implicit
# knowledge ($squaredMatrixFileName)
sub removeExplicit {
my $matrixFileName = shift; #the explicit knowledge matrix (usually not filtered)
my $squaredMatrixFileName = shift; #the implicit with explicit knowledge matrix (filtered squared)
my $outputFileName = shift; #the implicit knowledge matrix output file
print STDERR "Removing Explicit from $matrixFileName\n";
#read in the matrix
open IN, $matrixFileName
or die("ERROR: unable to open matrix input file: $matrixFileName\n");
my %matrix = ();
my $numCooccurrences = 0;
while (my $line = <IN>) {
#$line =~ /([^\t]+)\t([^\t]+)\t([\d]+)/;
$line =~ /([^\s]+)\s([^\s]+)\s([\d]+)/;
if (!exists $matrix{$1}) {
my %hash = ();
$matrix{$1} = \%hash;
}
$matrix{$1}{$2} = $3;
}
close IN;
#copy the implicit values of the squared matrix over to a new file
open IN, $squaredMatrixFileName
or die("ERROR: unable to open squared matrix input file: $squaredMatrixFileName\n");
open OUT, ">$outputFileName"
or die("ERROR: unable to open output file: $outputFileName\n");
while (my $line = <IN>) {
$line =~ /([^\s]+)\s([^\s]+)\s([\d]+)/;
if (!exists ${$matrix{$1}}{$2}) {
print OUT $line;
}
}
close IN;
close OUT;
print STDERR "DONE!\n";
}
( run in 2.467 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )