ALBD

 view release on metacpan or  search on metacpan

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

    $map /= (scalar keys %{$trueMatrixRef});

    #return the mean average precision
    return $map;
}


# calculates the mean precision at k at intervals of 1, 
# from k = 1-10 and intervals of 10 for 10-100
# input:  $trueMatrixRef <- a ref to a hash of true discoveries
#         $rowRanksRef <- a ref to a hash of arrays of ranked predictions. 
#                         Each hash key is a cui,  each hash element is an 
#                         array of ranked predictions for that cui. The ranked 
#                         predictions are cuis are ordered in descending order 
#                         based on association. (from Rank::RankDescending)
# output: \%meanPrecision <- a hash of mean preicsions at K, each key is the 
#                            value of k, the the value is the precision at that
#                            k
sub calculatePrecisionAtK {
    #grab the input
    my $trueMatrixRef = shift; # a matrix of true discoveries
    my $rowRanksRef = shift; # a hash of ranked predicted discoveries
  
    #generate precision at k at intervals of 10 for k = 10-100
    my %meanPrecision = ();
    my $interval = 1;
    for (my $k = 1; $k <= 100; $k+=$interval) {
	$meanPrecision{$k} = 0;

	#average the mean precision over all terms
	foreach my $rowKey (keys %{$trueMatrixRef}) {
	    my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
	    
	    #skip for rows that have no predictions
	    if (!defined $rankedPredictionsRef) {
		next;
	    } 
	    my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
	    #threshold the interval, so that it does not exceed 
	    # the number of predictions
	    my $interval = $k;
	    if ($k > scalar @{$rankedPredictionsRef}) {
		$interval = scalar @{$rankedPredictionsRef};
	    }

	    #find the number of true positives in the top $interval ranked terms
	    my $truePositiveCount = 0;
	    for (my $rank = 0; $rank < $interval; $rank++) {
		my $cui = ${$rankedPredictionsRef}[$rank];
		if (exists ${$trueRef}{$cui}) {
		    $truePositiveCount++;
		}
	    }

	    #add this precision to the mean precisions at k
	    $meanPrecision{$k} += ($truePositiveCount/$interval);
	}
	#take the mean of the precisions
	$meanPrecision{$k} /= (scalar keys %{$trueMatrixRef});

	#after computing precision at 1-10, change interval to 10
	if ($k == 10) {
	    $interval = 10;
	}
    }

    #return the mean precisions at k
    return \%meanPrecision;
}


# calculates the number of co-occurrences in the gold set of the top ranked 
# k predictions at k at intervals of 1, from k = 1-10 and intervals of 10 
# for 10-100. Co-occurrence counts are averaged over each of the starting terms
# input:  $trueMatrixRef <- a ref to a hash of true discoveries
#         $rowRanksRef <- a ref to a hash of arrays of ranked predictions. 
#                         Each hash key is a cui,  each hash element is an 
#                         array of ranked predictions for that cui. The ranked 
#                         predictions are cuis are ordered in descending order 
#                         based on association. (from Rank::RankDescending)
# output: \%meanCooccurrenceCounts <- a hash of mean preicsions at K, each key 
#                                     is the value of k, the the value is the 
#                                     precision at that k
sub calculateMeanCooccurrencesAtK {
    #grab the input
    my $trueMatrixRef = shift; # a matrix of true discoveries
    my $rowRanksRef = shift; # a hash of ranked predicted discoveries
  
    #generate mean cooccurrences at k at intervals of 10 for k = 10-100
    my %meanCooccurrenceCount = (); #count of the number of co-occurrences for each k
    my $interval = 1;
    for (my $k = 1; $k <= 100; $k+=$interval) {
	$meanCooccurrenceCount{$k} = 0;

	#average the mean co-occurrenes over all terms
	#  the true matrix contains only rows for the cuis being tested 
        #  or in time slicing
	foreach my $rowKey (keys %{$trueMatrixRef}) {
	    my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
	    
	    #skip for rows that have no predictions
	    if (!defined $rankedPredictionsRef) {
		next;
	    } 
	    my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries

	    #threshold the interval, so that it does not exceed 
	    # the number of predictions
	    my $interval = $k;
	    if ($k > scalar @{$rankedPredictionsRef}) {
		$interval = scalar @{$rankedPredictionsRef};
	    }

	    #find the number of true co-occurrence for the top $interval 
	    # ranked terms
	    my $cooccurrenceCount = 0;
	    for (my $rank = 0; $rank < $interval; $rank++) {
		my $cui = ${$rankedPredictionsRef}[$rank];
		if (exists ${$trueRef}{$cui}) {
		    $cooccurrenceCount += ${$trueRef}{$cui};
		}
	    }

	    #add this precision to the mean precisions at k
	    $meanCooccurrenceCount{$k} += $cooccurrenceCount;
	}
	#take the mean of the cooccurrence counts
	$meanCooccurrenceCount{$k} /= (scalar keys %{$trueMatrixRef});

	#after computing cooccurrence counts  at 1-10, change interval to 10
	if ($k == 10) {
	    $interval = 10;
	}
    }

    #return the mean precisions at k
    return \%meanCooccurrenceCount;
}

1;



( run in 1.705 second using v1.01-cache-2.11-cpan-0d23b851a93 )