ALBD
view release on metacpan or search on metacpan
lib/LiteratureBasedDiscovery/Rank.pm view on Meta::CPAN
#LBD Info
my $startingMatrixRef = shift;
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
#get all the A->C pairs
my $acPairsRef = &_getACPairs($startingMatrixRef, $implicitMatrixRef);
my %scores = ();
foreach my $pairKey (keys %{$acPairsRef}) {
#get the A and C keys
my ($aKey, $cKey) = split(/,/,$pairKey);
#grab the A and C explicit vectors
my $aVectorRef = ${$explicitMatrixRef}{$aKey};
my $cVectorRef = ${$explicitMatrixRef}{$cKey};
#find the numerator which is the sum of A[i]*C[i] values
my $numerator = 0;
foreach my $key (keys ${$aVectorRef}) {
if (exists ${$cVectorRef}{$key}) {
$numerator += ${$aVectorRef}{$key} * ${$cVectorRef}{$key};
}
}
#find the sum of A squared
my $aSum = 0;
foreach my $key (keys ${$aVectorRef}) {
$aSum += ($key*$key);
}
#find the sum of C squared
my $cSum = 0;
foreach my $key (keys ${$aVectorRef}) {
$cSum += ($key*$key);
}
#find the denominator, which is the product of A and C lengths
my $denom = sqrt($aSum)*sqrt($cSum);
#set the score (maximum score seen for that C term)
my $score = -1;
if ($denom != 0) {
$score = $numerator/$denom;
}
if (exists $scores{$cKey}) {
if ($score > $scores{$cKey}) {
$scores{$cKey} = $score;
}
}
else {
$scores{$cKey} = $score;
}
}
return \%scores;
}
# gets a list of A->C pairs, and sets the value as the implicit matrix value
# input: $startingMatrixRef <- ref to the starting matrix
# $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref where keys are comma seperated cui pairs hash{'C000,C111'}
# and values are set to the value at that index in the implicit matrix
sub _getACPairs {
my $startingMatrixRef = shift;
my $implicitMatrixRef = shift;
#generate a list of ac pairs
my %acPairs = ();
foreach my $keyA (keys %{$implicitMatrixRef}) {
foreach my $keyC (%{${$implicitMatrixRef}{$keyA}}) {
$acPairs{$keyA,$keyC} = ${${$implicitMatrixRef}{$keyA}}{$keyC};
}
}
return \%acPairs;
}
# scores each implicit CUI based on the number of linking terms between
# it and all starting terms.
# input: $startingMatrixRef <- ref to the starting matrix
# $explicitMatrixRef <- ref to the explicit matrix
# $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_linkingTermCount {
#LBD Info
my $startingMatrixRef = shift;
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
#get all bc pairs
my $bcPairsRef = &_getBCPairs($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef);
# Find the linking term count for each cTerm
my %scores = ();
my ($key1, $key2);
foreach my $pairKey (keys %{$bcPairsRef}) {
#cTerm is the second value ($key2)
($key1, $key2) = split(/,/,$pairKey);
#automatically initializes to 0
$scores{$key2}++;
}
return \%scores;
}
# scores each implicit CUI based on the summed frequency of co-occurrence
# between it and all B terms (A->B frequencies are NOT considered)
# input: $startingMatrixRef <- ref to the starting matrix
# $explicitMatrixRef <- ref to the explicit matrix
# $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_frequency {
#LBD Info
my $startingMatrixRef = shift;
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
#get all bc pairs
( run in 2.132 seconds using v1.01-cache-2.11-cpan-0bb4e1dffa6 )