ALBD
view release on metacpan or search on metacpan
lib/LiteratureBasedDiscovery/Rank.pm view on Meta::CPAN
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_LTC_AMW {
#grab the input
my $startingMatrixRef = shift;
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
my $measure = shift;
my $association = shift;
my $abScoresRef = shift;
#optionally pass in stats so they don't get recalculated for
# multiple terms (such as with time slicing)
my $n1pRef = shift;
my $np1Ref = shift;
my $nppRef = shift;
#get linking term count scores
my $ltcAssociationsRef = &scoreImplicit_linkingTermCount($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef);
#get average minimum weight scores
my $amwScoresRef = &scoreImplicit_averageMinimumWeight($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef, $measure, $association, $abScoresRef, $n1pRef, $np1Ref, $nppRef);
#create a hash of cui pairs for which the key is the ltc, and the value is an array of cui pairs that have that LTC
my %ltcHash = ();
foreach my $pairKey (keys %{$ltcAssociationsRef}) {
#get the LTC we will be tie breaking
my $currentLTC = ${$ltcAssociationsRef}{$pairKey};
if (!exists $ltcHash{$currentLTC}) {
my @newArray = ();
$ltcHash{$currentLTC} = \@newArray;
}
push @{$ltcHash{$currentLTC}}, $pairKey;
}
#generate the LTC-AMW scores by assigning a rank value
# first by LTC, and then my AMW
my %ltcAMWScores = ();
my $topRank = scalar keys %{$ltcAssociationsRef};
my $currentRank = $topRank;
#iterate first over ltc in descending order
foreach my $ltc (sort {$b <=> $a} keys %ltcHash) {
#check each cuiPair with this ltc
my %tiedAMWScores = ();
foreach my $cuiPair (@{$ltcHash{$ltc}}) {
$tiedAMWScores{$cuiPair} = ${$amwScoresRef}{$cuiPair};
}
#add the cui pairs by descending amw score
foreach my $cuiPair (sort {$tiedAMWScores{$b} <=> $tiedAMWScores{$a}} keys %tiedAMWScores) {
$ltcAMWScores{$cuiPair} = $currentRank;
$currentRank--;
}
}
#return the scores
return \%ltcAMWScores;
}
#TODO this is an untested method
# gets the max cosine distance score between all a terms and each cTerm
# input: $startingMatrixRef <- ref to the starting matrix
# $explicitMatrixRef <- ref to the explicit matrix
# $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub score_cosineDistance {
#LBD Info
my $startingMatrixRef = shift;
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
#get all the A->C pairs
my $acPairsRef = &_getACPairs($startingMatrixRef, $implicitMatrixRef);
my %scores = ();
foreach my $pairKey (keys %{$acPairsRef}) {
#get the A and C keys
my ($aKey, $cKey) = split(/,/,$pairKey);
#grab the A and C explicit vectors
my $aVectorRef = ${$explicitMatrixRef}{$aKey};
my $cVectorRef = ${$explicitMatrixRef}{$cKey};
#find the numerator which is the sum of A[i]*C[i] values
my $numerator = 0;
foreach my $key (keys ${$aVectorRef}) {
if (exists ${$cVectorRef}{$key}) {
$numerator += ${$aVectorRef}{$key} * ${$cVectorRef}{$key};
}
}
#find the sum of A squared
my $aSum = 0;
foreach my $key (keys ${$aVectorRef}) {
$aSum += ($key*$key);
}
#find the sum of C squared
my $cSum = 0;
foreach my $key (keys ${$aVectorRef}) {
$cSum += ($key*$key);
}
#find the denominator, which is the product of A and C lengths
my $denom = sqrt($aSum)*sqrt($cSum);
#set the score (maximum score seen for that C term)
my $score = -1;
if ($denom != 0) {
$score = $numerator/$denom;
}
if (exists $scores{$cKey}) {
if ($score > $scores{$cKey}) {
$scores{$cKey} = $score;
}
}
else {
$scores{$cKey} = $score;
}
}
( run in 3.236 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )