ALBD
view release on metacpan or search on metacpan
lib/ALBD.pm view on Meta::CPAN
#output stats
print "predicted - total, min, max, average = $predictedTotal, $predictedMin, $predictedMax, $predictedAverage\n";
print "true - total, min, max, average = $trueTotal, $trueMin, $trueMax, $trueAverage\n";
}
# generates precision and recall values by varying the threshold
# of the A->C ranking measure. Also generates precision at k, and
# mean average precision
# input: none
# output: none, but precision, recall, precision at k, and map values
# output to STDOUT
sub timeSlicing_generatePrecisionAndRecall_implicit {
my $NUM_SAMPLES = 200; #TODO, read fomr file number of samples to average over for timeslicing
my $self = shift;
my $start; #used to record run times
print "In timeSlicing_generatePrecisionAndRecall_implicit\n";
#Get inputs
my $startAcceptTypesRef = $self->_getAcceptTypes('start');
my $linkingAcceptTypesRef = $self->_getAcceptTypes('linking');
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
#output precision and recall
print "----- average precision at 10% recall intervals (i recall precision) ----> \n";
foreach my $i (sort {$a <=> $b} keys %{$precisionRef}) {
print " $i ${$recallRef}{$i} ${$precisionRef}{$i}\n";
}
print "\n";
#-------------------------------------------
#calculate mean average precision
my $map = &calculateMeanAveragePrecision(
$goldMatrixRef, $rowRanksRef);
#output mean average precision
print "---------- mean average precision ---------------> \n";
print " MAP = $map\n";
print "\n";
#-------------------------------------------
#calculate precision at k
print "calculating precision at k\n";
my $precisionAtKRef = &calculatePrecisionAtK($goldMatrixRef, $rowRanksRef);
#output precision at k
print "---------- mean precision at k intervals ---------------> \n";
foreach my $k (sort {$a <=> $b} keys %{$precisionAtKRef}) {
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
# calculates the mean average precision (MAP)
# input: $trueMatrixRef <- a ref to a hash of true discoveries
# $rowRanksRef <- a ref to a hash of arrays of ranked predictions.
# Each hash key is a cui, each hash element is an
# array of ranked predictions for that cui. The ranked
# predictions are cuis are ordered in descending order
# based on association. (from Rank::RankDescending)
# output: $map <- a scalar value of mean average precision (MAP)
sub calculateMeanAveragePrecision {
#grab the input
my $trueMatrixRef = shift; # a matrix of true discoveries
my $rowRanksRef = shift; # a hash of ranked predicted discoveries
print "calculating mean average precision\n";
#calculate MAP for each true discovery being predicted
my $map = 0;
foreach my $rowKey (keys %{$trueMatrixRef}) {
my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
#skip for rows that have no predictions
if (!defined $rankedPredictionsRef) {
next;
}
my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
my $numPredictions = scalar @{$rankedPredictionsRef};
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
my $truePositiveCount = 0;
#start at 1, since divide by rank...subtract one when indexing
for (my $rank = 1; $rank <= $numPredictions; $rank++) {
my $cui = ${$rankedPredictionsRef}[$rank-1];
if (exists ${$trueRef}{$cui}) {
$truePositiveCount++;
$ap += ($truePositiveCount/($rank));
}
}
#calculate the average precision, and add to map
if ($truePositiveCount > 0) {
$ap /= $truePositiveCount;
} #else, $ap is already 0 so do nothing
$map += $ap;
}
#take the mean of the average precisions
# divide by the number of true discoveries that you summed over
$map /= (scalar keys %{$trueMatrixRef});
#return the mean average precision
return $map;
}
# calculates the mean precision at k at intervals of 1,
# from k = 1-10 and intervals of 10 for 10-100
# input: $trueMatrixRef <- a ref to a hash of true discoveries
# $rowRanksRef <- a ref to a hash of arrays of ranked predictions.
# Each hash key is a cui, each hash element is an
# array of ranked predictions for that cui. The ranked
# predictions are cuis are ordered in descending order
samples/lbdConfig view on Meta::CPAN
# ps - Poisson Stirling Measure
# tscore - T-score
<rankingMeasure>ll
# The output path of the results of lbd
<implicitOutputFile>sampleOutput
# a comma seperated list of linking (B) term accept semantic groups, which
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS.
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<linkingAcceptGroups>CHEM,DISO,GENE,PHYS,ANAT
# similar to linking accept groups, this restricts the acceptable linking (B)
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem
# a comma seperated list of target (C) term accept semantic groups, which
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS.
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<targetAcceptGroups>CHEM,GENE
# similar to target termcept groups, this restricts the acceptable target (C)
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem
# Input file path for the explicit co-occurrence matrix used in LBD
<explicitInputFile>sampleExplicitMatrix
samples/timeSlicingConfig view on Meta::CPAN
# pmi - Pointwise Mutual Information
# phi - Phi Coefficient
# chi - Pearson's Chi Squared Test
# ps - Poisson Stirling Measure
# tscore - T-score
<rankingMeasure>ll
# a comma seperated list of linking (B) term accept semantic groups, which
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS.
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<linkingAcceptGroups>CHEM,DISO,GENE,PHYS,ANAT
# similar to linking accept groups, this restricts the acceptable linking (B)
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem
# a comma seperated list of target (C) term accept semantic groups, which
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS.
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<targetAcceptGroups>CHEM,GENE
# similar to target termcept groups, this restricts the acceptable target (C)
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem
# Input file path for the explicit co-occurrence matrix used in LBD
<explicitInputFile>sampleExplicitMatrix
( run in 0.723 second using v1.01-cache-2.11-cpan-49f99fa48dc )