ALBD
view release on metacpan or search on metacpan
utils/datasetCreator/removeCUIPair.pl view on Meta::CPAN
# removes the cui pair from the dataset
# used to remove Somatomedic C and Arginine from the 1960-1989 datasets
use strict;
use warnings;
my $cuiA = 'C0021665'; #somatomedic c
my $cuiB = 'C0003765'; #arginine
my $matrixFileName = '/home/henryst/lbdData/groupedData/1960_1989_window8_ordered';
my $matrixOutFileName = $matrixFileName.'_removed';
&removeCuiPair($cuiA, $cuiB, $matrixFileName, $matrixOutFileName);
print STDERR "DONE\n";
###########################################
# remove the CUI pair from the dataset
sub removeCuiPair {
my $cuiA = shift;
my $cuiB = shift;
my $matrixFileName = shift;
my $matrixOutFileName = shift;
print STDERR "removing $cuiA,$cuiB from $matrixFileName\n";
#open the in and out files
open IN, $matrixFileName
or die ("ERROR: cannot open matrix in file: $matrixFileName\n");
open OUT, ">$matrixOutFileName"
or die ("ERROR: cannot open matrix out file: $matrixOutFileName\n");
# read in each line of the matrix and copy to the new file
# but omit any $cuiA,$cuiB or $cuiB,$cuiA lines
while (my $line = <IN>) {
if ($line =~ /$cuiA\t$cuiB/ || $line =~ /$cuiB\t$cuiA/) {
print " removing $line";
next;
}
else {
print OUT $line;
}
}
}
( run in 1.186 second using v1.01-cache-2.11-cpan-39bf76dae61 )