ALBD

 view release on metacpan or  search on metacpan

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

	if (!(exists $thresholdedMatrix{$cui1})) {
	    my %newHash = ();
	    $thresholdedMatrix{$cui1} = \%newHash;
	}

	#set key value for the key pair
	${$thresholdedMatrix{$cui1}}{$cui2} = ${${$matrixRef}{$cui1}}{$cui2};
	$postKeyCount++;

	#stop adding keys when below the threshold
	if (${$assocScoresRef}{$key} < $threshold) {
	    last;
	}
    }
    #return the thresholded matrix
    return \%thresholdedMatrix;
}


# calculates precision and recall at $numIntervals (e.g. 10 for 10%) recall 
# intervals using an implicit ranking threshold
# input:  $trueMatrixRef <- a ref to a hash of true discoveries
#         $rowRanksRef <- a ref to a hash of arrays of ranked predictions. 
#                         Each hash key is a cui,  each hash element is an 
#                         array of ranked predictions for that cui. The ranked 
#                         predictions are cuis are ordered in descending order 
#                         based on association. (from Rank::RankDescending)
#         $numIntervals <- the number of recall intervals to generate
# output: (\%precision, \%recall) <- refs to hashes of precision and recall. 
#                                    Each hash key is the interval number, and 
#                                    the value is the precision and recall 
#                                    respectively
sub calculatePrecisionAndRecall_implicit {
    my $trueMatrixRef = shift; #a ref to the true matrix
    my $rowRanksRef = shift; #a ref to ranked predictions, each hash element are the predictions for a single cui, at each element is an array of cuis ordered by their rank
    my $numIntervals = shift; #the recall intervals to test at

    #find precision and recall curves for each cui that is being predicted
    #  take the sum of precisions, then average after the loop
    my %precision = ();
    my %recall = ();
    foreach my $rowKey (keys %{$trueMatrixRef}) {
	my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
	my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
	
	#get the number of predicted discoveries and true discoveries
	my $numPredictions = scalar @{$rankedPredictionsRef};
	my $numTrue = scalar keys %{$trueRef};

	#skip if there are NO new discoveries for this start term
	if ($numTrue == 0) {
	    next;
	}
	#skip if there are NO predictions for this start term
	if ($numPredictions == 0) {
	    next;
	}

	#determine precision and recall at 10% intervals of the number of 
	#predicted true vaules. This is done by simulating a threshold being
	#applied, so the top $numToTest ranked terms are tested at 10% intervals
	my $interval = $numPredictions/$numIntervals;
	for (my $i = 0; $i <= 1; $i+=(1/$numIntervals)) {
	    
	    #determine the number true to grab
	    my $numTrueForInterval = 1; #at $i = 0, grab just the first term that is true
	    if ($i > 0) {
		$numTrueForInterval = $numTrue*$i;
	    }

	    #grab true discoveries until the recall rate is exceeded
	    my $truePositive = 0;
	    my $numChecked = 0;
	    for (my $j = 0; $j < $numPredictions; $j++) {

		#get the jth ranked cui and check if it is a true discovery
		my $cui = ${$rankedPredictionsRef}[$j];
		if (exists ${$trueRef}{$cui}) {
		    $truePositive++;
		}
		$numChecked++;

		#check if the recall rate has been reached
		if ($truePositive > $numTrueForInterval) {
		    last;
		}
	    }
	    #sum precision at this interval, average over number of rows is 
	    # taken outside of the loop
	    $precision{$i} += ($truePositive / $numChecked); #number that are selected that are true
	    $recall{$i} += ($truePositive / $numTrue); #number of true that are selected	
	}
    }

    #calculate the average precision at each interval
    foreach my $i (keys %precision) {
	#divide by the number of rows in the true matrix ref
	# because those are the number of cuis we are testing
	# it is possible that the predictions has rows that are 
	# not in the true, and those should be ignored.
	$precision{$i} /= (scalar keys %{$trueMatrixRef});
	$recall{$i} /= (scalar keys %{$trueMatrixRef});
    }

    #return the precision and recall at 10% intervals
    return (\%precision, \%recall);
}



# calculates the mean average precision (MAP)
# input:  $trueMatrixRef <- a ref to a hash of true discoveries
#         $rowRanksRef <- a ref to a hash of arrays of ranked predictions. 
#                         Each hash key is a cui,  each hash element is an 
#                         array of ranked predictions for that cui. The ranked 
#                         predictions are cuis are ordered in descending order 
#                         based on association. (from Rank::RankDescending)
# output: $map <- a scalar value of mean average precision (MAP)
sub calculateMeanAveragePrecision {
    #grab the input
    my $trueMatrixRef = shift; # a matrix of true discoveries



( run in 0.565 second using v1.01-cache-2.11-cpan-39bf76dae61 )