ALBD
view release on metacpan or search on metacpan
lib/LiteratureBasedDiscovery/Rank.pm view on Meta::CPAN
#automatically initializes to 0 (with +=)
$scores{$key2} += ${$bcPairsRef}{$pairKey};
}
return \%scores;
}
# scores each implicit CUI using an assocation measure. Score is the maximum
# association between a column in the implicit matrix, and one of the start
# matrix terms (so max between any A and that C term).
# Score is calculated using the implicit matrix
# input: $startCuisRef <- ref to an array of start cuis (A terms)
# $implicitMatrixFileName <- fileName of the implicit matrix
# $measure <- the string of the umls association measure to use
# $association <- an instance of umls association
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_fromImplicitMatrix {
#LBD Info
my $startCuisRef = shift;
my $implicitMatrixFileName = shift;
my $measure = shift;
my $association = shift;
######################################
#Get hashes for A and C terms
#####################################
#create a hash of starting terms
my %aTerms = ();
foreach my $cui (@{$startCuisRef}) {
$aTerms{$cui} = 1;
}
#get all the target terms (terms that co-occur with aTerms
# in the implicit matrix file = the implicit terms)
open IN, "$implicitMatrixFileName";
my %cTerms = ();
while (my $line = <IN>) {
$line =~ /(C\d{7})\s(C\d{7})/;
if (exists $aTerms{$1}) {
$cTerms{$2} = 1;
}
}
######################################
#Get Co-occurrence values, N11, N1P, NP1, NPP
######################################
#NPP is the number of Co-occurreces total
#@NP1 is the number of co-occurrences of a C term with any term ... so sum of XXX\tCTerm\tVal for each cTerm
#@N1P is the number of co-occurrences of any A term ... so sum of anyATerm\tXXX\t
#N11{Cterm} is the sum of anyATerm\tCTerm\tVal
seek IN, 0,0; #reset to the beginning of the implicit file
#iterate over the lines of interest, and grab values
my %np1 = ();
my %n11 = ();
my $n1p = 0;
my $npp = 0;
my $matchedCuiB = 0;
my ($cuiA, $cuiB, $val);
while (my $line = <IN>) {
#grab data from the line
($cuiA, $cuiB, $val) = split(/\t/,$line);
#see if updates are necessary
if (exists $aTerms{$cuiA} || exists $cTerms{$cuiB}) {
#update npp
$npp += $3;
#update np1
if (exists $cTerms{$cuiB}) {
$np1{$cuiB} += $val;
$matchedCuiB = 1;
}
#update n1p
if (exists $aTerms{$cuiA}) {
$n1p += $val;
#update n11 if needed
if ($matchedCuiB) {
$n11{$cuiB} += $val;
$matchedCuiB = 0;
}
}
}
}
######################################
# Calculate Association for each c term
######################################
my %associationScores = ();
foreach my $cTerm(keys %cTerms) {
$associationScores{$cTerm} =
$association->_calculateAssociation_fromObservedCounts($n11{$cTerm}, $n1p, $np1{$cTerm}, $npp, $measure);
}
return \%associationScores;
}
# scores each implicit CUI using an assocation measure. Score is the maximum
# association between any of the linking terms.
# input: $startingMatrixRef <- ref to the starting matrix
# $explicitMatrixRef <- ref to the explicit matrix
# $implicitMatrixRef <- ref to the implicit matrix
# $measure <- the string of the umls association measure to use
# $association <- an instance of umls association
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_fromAllPairs {
#LBD Info
my $startingMatrixRef = shift;
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
my $measure = shift;
my $association = shift;
#optionally pass in stats so they don't get recalculated for
# multiple terms (such as with time slicing)
my $n1pRef = shift;
my $np1Ref = shift;
( run in 2.882 seconds using v1.01-cache-2.11-cpan-5837b0d9d2c )