ALBD
view release on metacpan or search on metacpan
lib/ALBD.pm view on Meta::CPAN
$implicitMatrixRef
= Discovery::removeExplicit($startingMatrixRef, $implicitMatrixRef);
#apply a semantic type filter to the implicit matrix
if ((scalar keys %{$targetAcceptTypesRef}) > 0) {
Filters::semanticTypeFilter_columns(
$implicitMatrixRef, $targetAcceptTypesRef, $umls_interface);
}
#calculate precision and recall
my ($precision, $recall) = TimeSlicing::calculatePrecisionRecall(
$implicitMatrixRef, $postCutoffMatrixRef);
print "precision = $precision, recall = $recall\n";
#calculate averages/min/max only for $i= $numIntervals, which is all terms
if ($i == $numIntervals) {
#average over all terms
foreach my $rowKey(keys %{$implicitMatrixRef}) {
#get the counts true and predicted for this term (row of matrix)
my $numPredicted = scalar keys %{${$implicitMatrixRef}{$rowKey}};
my $numTrue = scalar keys %{${$postCutoffMatrixRef}{$rowKey}};
#sum counts
$predictedAverage += $numPredicted;
$trueAverage += $numTrue;
#update min and max
if ($numPredicted < $predictedMin) {
$predictedMin = $numPredicted;
}
if ($numPredicted > $predictedMax) {
$predictedMax = $numPredicted;
}
if ($numTrue < $trueMin) {
$predictedMin = $numTrue;
}
if ($numTrue > $trueMax) {
$predictedMax = $numTrue;
}
$predictedTotal += $numPredicted;
$trueTotal += $numTrue;
}
#take the average, both true and predicted matrices
# have the same number of rows.
$predictedAverage /= (scalar keys %{$implicitMatrixRef});
$trueAverage /= (scalar keys %{$implicitMatrixRef});
}
}
#output stats
print "predicted - total, min, max, average = $predictedTotal, $predictedMin, $predictedMax, $predictedAverage\n";
print "true - total, min, max, average = $trueTotal, $trueMin, $trueMax, $trueAverage\n";
}
# generates precision and recall values by varying the threshold
# of the A->C ranking measure. Also generates precision at k, and
# mean average precision
# input: none
# output: none, but precision, recall, precision at k, and map values
# output to STDOUT
sub timeSlicing_generatePrecisionAndRecall_implicit {
my $NUM_SAMPLES = 200; #TODO, read fomr file number of samples to average over for timeslicing
my $self = shift;
my $start; #used to record run times
print "In timeSlicing_generatePrecisionAndRecall_implicit\n";
#Get inputs
my $startAcceptTypesRef = $self->_getAcceptTypes('start');
my $linkingAcceptTypesRef = $self->_getAcceptTypes('linking');
my $targetAcceptTypesRef = $self->_getAcceptTypes('target');
#-----------
# Starting Matrix Creation
#-----------
#Get the Explicit Matrix
print "loading explicit\n";
my $explicitMatrixRef;
if(!defined $lbdOptions{'explicitInputFile'}) {
die ("ERROR: explicitInputFile must be defined in LBD config file\n");
}
$explicitMatrixRef = Discovery::fileToSparseMatrix($lbdOptions{'explicitInputFile'});
#create the starting matrix
print "generating starting\n";
my $startingMatrixRef
= TimeSlicing::generateStartingMatrix($explicitMatrixRef, \%lbdOptions, $startAcceptTypesRef, $NUM_SAMPLES, $umls_interface);
#----------
#--------
# Gold Loading/Creation
#--------
#load or create the gold matrix
my $goldMatrixRef;
if (exists $lbdOptions{'goldInputFile'}) {
print "inputting gold\n";
$goldMatrixRef = Discovery::fileToSparseMatrix($lbdOptions{'goldInputFile'});
}
else {
print "loading post cutoff\n";
$goldMatrixRef = TimeSlicing::loadPostCutOffMatrix($startingMatrixRef, $explicitMatrixRef, $lbdOptions{'postCutoffFileName'});
#remove explicit knowledge from the post cutoff matrix
$goldMatrixRef = Discovery::removeExplicit($startingMatrixRef, $goldMatrixRef);
#apply a semantic type filter to the post cutoff matrix
print "applying semantic filter to post-cutoff matrix\n";
if ((scalar keys %{$targetAcceptTypesRef}) > 0) {
Filters::semanticTypeFilter_columns(
$goldMatrixRef, $targetAcceptTypesRef, $umls_interface);
}
#TODO why is the gold matrix outputting with an extra line between samples?
#output the gold matrix
if (exists $lbdOptions{'goldOutputFile'}) {
print "outputting gold\n";
Discovery::outputMatrixToFile($lbdOptions{'goldOutputFile'}, $goldMatrixRef);
}
}
( run in 1.226 second using v1.01-cache-2.11-cpan-140bd7fdf52 )