ALBD

 view release on metacpan or  search on metacpan

lib/ALBD.pm  view on Meta::CPAN

    #output stats
    print "predicted - total, min, max, average = $predictedTotal, $predictedMin, $predictedMax, $predictedAverage\n";
    print "true - total, min, max, average = $trueTotal, $trueMin, $trueMax, $trueAverage\n";
}


# generates precision and recall values by varying the threshold
# of the A->C ranking measure. Also generates precision at k, and
# mean average precision
# input:  none
# output: none, but precision, recall, precision at k, and map values
#         output to STDOUT
sub timeSlicing_generatePrecisionAndRecall_implicit {
    my $NUM_SAMPLES = 200; #TODO, read fomr file number of samples to average over for timeslicing
    my $self = shift;
    my $start; #used to record run times
    print "In timeSlicing_generatePrecisionAndRecall_implicit\n";

    #Get inputs
    my $startAcceptTypesRef = $self->_getAcceptTypes('start');
    my $linkingAcceptTypesRef = $self->_getAcceptTypes('linking');

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

    #output precision and recall
    print "----- average precision at 10% recall intervals (i recall precision) ----> \n";
    foreach my $i (sort {$a <=> $b} keys %{$precisionRef}) {
	print "      $i ${$recallRef}{$i} ${$precisionRef}{$i}\n";
    }
    print "\n";
    
#-------------------------------------------
    
    #calculate mean average precision
    my $map = &calculateMeanAveragePrecision(
	$goldMatrixRef, $rowRanksRef);
    #output mean average precision
    print "---------- mean average precision ---------------> \n";
    print "      MAP = $map\n";
    print "\n";

#-------------------------------------------
    
    #calculate precision at k
    print "calculating precision at k\n";
    my $precisionAtKRef = &calculatePrecisionAtK($goldMatrixRef, $rowRanksRef);
    #output precision at k
    print "---------- mean precision at k intervals ---------------> \n";
    foreach my $k (sort {$a <=> $b} keys %{$precisionAtKRef}) {

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN




# calculates the mean average precision (MAP)
# input:  $trueMatrixRef <- a ref to a hash of true discoveries
#         $rowRanksRef <- a ref to a hash of arrays of ranked predictions. 
#                         Each hash key is a cui,  each hash element is an 
#                         array of ranked predictions for that cui. The ranked 
#                         predictions are cuis are ordered in descending order 
#                         based on association. (from Rank::RankDescending)
# output: $map <- a scalar value of mean average precision (MAP)
sub calculateMeanAveragePrecision {
    #grab the input
    my $trueMatrixRef = shift; # a matrix of true discoveries
    my $rowRanksRef = shift; # a hash of ranked predicted discoveries
    print "calculating mean average precision\n";

    #calculate MAP for each true discovery being predicted
    my $map = 0;
    foreach my $rowKey (keys %{$trueMatrixRef}) {
	my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions

	#skip for rows that have no predictions
	if (!defined $rankedPredictionsRef) {
	    next;
	} 
	my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
	my $numPredictions = scalar @{$rankedPredictionsRef};

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

	my $truePositiveCount = 0;
	#start at 1, since divide by rank...subtract one when indexing
	for (my $rank = 1; $rank <= $numPredictions; $rank++) {
	    my $cui = ${$rankedPredictionsRef}[$rank-1];
	    if (exists ${$trueRef}{$cui}) {
		$truePositiveCount++;
		$ap += ($truePositiveCount/($rank));
	    }
	}

	#calculate the average precision, and add to map
	if ($truePositiveCount > 0) {
	    $ap /= $truePositiveCount;
	} #else, $ap is already 0 so do nothing
	$map += $ap;
    }

    #take the mean of the average precisions
    # divide by the number of true discoveries that you summed over
    $map /= (scalar keys %{$trueMatrixRef});

    #return the mean average precision
    return $map;
}


# calculates the mean precision at k at intervals of 1, 
# from k = 1-10 and intervals of 10 for 10-100
# input:  $trueMatrixRef <- a ref to a hash of true discoveries
#         $rowRanksRef <- a ref to a hash of arrays of ranked predictions. 
#                         Each hash key is a cui,  each hash element is an 
#                         array of ranked predictions for that cui. The ranked 
#                         predictions are cuis are ordered in descending order 

samples/lbdConfig  view on Meta::CPAN

# ps - Poisson Stirling Measure 
# tscore - T-score
<rankingMeasure>ll

# The output path of the results of lbd
<implicitOutputFile>sampleOutput

# a comma seperated list of linking (B) term accept semantic groups, which 
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS. 
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<linkingAcceptGroups>CHEM,DISO,GENE,PHYS,ANAT

# similar to linking accept groups, this restricts the acceptable linking (B) 
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem

# a comma seperated list of target (C) term accept semantic groups, which 
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS. 
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<targetAcceptGroups>CHEM,GENE

# similar to target termcept groups, this restricts the acceptable target (C) 
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem

# Input file path for the explicit co-occurrence matrix used in LBD
<explicitInputFile>sampleExplicitMatrix

samples/timeSlicingConfig  view on Meta::CPAN

# pmi - Pointwise Mutual Information
# phi - Phi Coefficient
# chi - Pearson's Chi Squared Test
# ps - Poisson Stirling Measure 
# tscore - T-score
<rankingMeasure>ll

# a comma seperated list of linking (B) term accept semantic groups, which 
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS. 
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<linkingAcceptGroups>CHEM,DISO,GENE,PHYS,ANAT

# similar to linking accept groups, this restricts the acceptable linking (B) 
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem

# a comma seperated list of target (C) term accept semantic groups, which 
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS. 
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<targetAcceptGroups>CHEM,GENE

# similar to target termcept groups, this restricts the acceptable target (C) 
# terms to terms within the semantic types listed
# See http://metampa.nlm.gov/Docs/SemanticTypes_2013AA.txt for a list
#<linkingAcceptGroups>clnd,chem

# Input file path for the explicit co-occurrence matrix used in LBD
<explicitInputFile>sampleExplicitMatrix



( run in 0.723 second using v1.01-cache-2.11-cpan-49f99fa48dc )