ALBD
view release on metacpan or search on metacpan
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
$map /= (scalar keys %{$trueMatrixRef});
#return the mean average precision
return $map;
}
# calculates the mean precision at k at intervals of 1,
# from k = 1-10 and intervals of 10 for 10-100
# input: $trueMatrixRef <- a ref to a hash of true discoveries
# $rowRanksRef <- a ref to a hash of arrays of ranked predictions.
# Each hash key is a cui, each hash element is an
# array of ranked predictions for that cui. The ranked
# predictions are cuis are ordered in descending order
# based on association. (from Rank::RankDescending)
# output: \%meanPrecision <- a hash of mean preicsions at K, each key is the
# value of k, the the value is the precision at that
# k
sub calculatePrecisionAtK {
#grab the input
my $trueMatrixRef = shift; # a matrix of true discoveries
my $rowRanksRef = shift; # a hash of ranked predicted discoveries
#generate precision at k at intervals of 10 for k = 10-100
my %meanPrecision = ();
my $interval = 1;
for (my $k = 1; $k <= 100; $k+=$interval) {
$meanPrecision{$k} = 0;
#average the mean precision over all terms
foreach my $rowKey (keys %{$trueMatrixRef}) {
my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
#skip for rows that have no predictions
if (!defined $rankedPredictionsRef) {
next;
}
my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
#threshold the interval, so that it does not exceed
# the number of predictions
my $interval = $k;
if ($k > scalar @{$rankedPredictionsRef}) {
$interval = scalar @{$rankedPredictionsRef};
}
#find the number of true positives in the top $interval ranked terms
my $truePositiveCount = 0;
for (my $rank = 0; $rank < $interval; $rank++) {
my $cui = ${$rankedPredictionsRef}[$rank];
if (exists ${$trueRef}{$cui}) {
$truePositiveCount++;
}
}
#add this precision to the mean precisions at k
$meanPrecision{$k} += ($truePositiveCount/$interval);
}
#take the mean of the precisions
$meanPrecision{$k} /= (scalar keys %{$trueMatrixRef});
#after computing precision at 1-10, change interval to 10
if ($k == 10) {
$interval = 10;
}
}
#return the mean precisions at k
return \%meanPrecision;
}
# calculates the number of co-occurrences in the gold set of the top ranked
# k predictions at k at intervals of 1, from k = 1-10 and intervals of 10
# for 10-100. Co-occurrence counts are averaged over each of the starting terms
# input: $trueMatrixRef <- a ref to a hash of true discoveries
# $rowRanksRef <- a ref to a hash of arrays of ranked predictions.
# Each hash key is a cui, each hash element is an
# array of ranked predictions for that cui. The ranked
# predictions are cuis are ordered in descending order
# based on association. (from Rank::RankDescending)
# output: \%meanCooccurrenceCounts <- a hash of mean preicsions at K, each key
# is the value of k, the the value is the
# precision at that k
sub calculateMeanCooccurrencesAtK {
#grab the input
my $trueMatrixRef = shift; # a matrix of true discoveries
my $rowRanksRef = shift; # a hash of ranked predicted discoveries
#generate mean cooccurrences at k at intervals of 10 for k = 10-100
my %meanCooccurrenceCount = (); #count of the number of co-occurrences for each k
my $interval = 1;
for (my $k = 1; $k <= 100; $k+=$interval) {
$meanCooccurrenceCount{$k} = 0;
#average the mean co-occurrenes over all terms
# the true matrix contains only rows for the cuis being tested
# or in time slicing
foreach my $rowKey (keys %{$trueMatrixRef}) {
my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
#skip for rows that have no predictions
if (!defined $rankedPredictionsRef) {
next;
}
my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
#threshold the interval, so that it does not exceed
# the number of predictions
my $interval = $k;
if ($k > scalar @{$rankedPredictionsRef}) {
$interval = scalar @{$rankedPredictionsRef};
}
#find the number of true co-occurrence for the top $interval
# ranked terms
my $cooccurrenceCount = 0;
for (my $rank = 0; $rank < $interval; $rank++) {
my $cui = ${$rankedPredictionsRef}[$rank];
if (exists ${$trueRef}{$cui}) {
$cooccurrenceCount += ${$trueRef}{$cui};
}
}
#add this precision to the mean precisions at k
$meanCooccurrenceCount{$k} += $cooccurrenceCount;
}
#take the mean of the cooccurrence counts
$meanCooccurrenceCount{$k} /= (scalar keys %{$trueMatrixRef});
#after computing cooccurrence counts at 1-10, change interval to 10
if ($k == 10) {
$interval = 10;
}
}
#return the mean precisions at k
return \%meanCooccurrenceCount;
}
1;
( run in 1.705 second using v1.01-cache-2.11-cpan-0d23b851a93 )