ALBD
view release on metacpan or search on metacpan
#read in the scores that were just generated
my %newScores = ();
open IN, './samples/sampleOutput'
or die ("Error: Cannot open sample output\n");
while (my $line = <IN>) {
if ($line =~ /\d+\t(\d+\.\d+)\t(C\d+)/) {
$newScores{$2} = $1;
}
}
close IN;
#check that the number of keys in the input and output files are the same
ok(scalar keys %goldScores == scalar keys %newScores, "Number of Output CUIs match");
#check that the gold and sample scores match
my $allMatch = 1;
my $allExist = 1;
foreach my $key(keys %goldScores) {
if (exists $newScores{$key}) {
if ($newScores{$key} != $goldScores{$key}) {
$allMatch = 0;
last;
}
}
else {
$allExist = 0;
$allMatch = 0;
last;
}
}
ok ($allExist == 1, "All CUIs exist in the output"); #all cuis exist in the new output file
ok ($allMatch == 1, "All Scores are the same in the output"); #all scores are the same in the new output file
print "Done with Open Discovery Tests\n\n";
#######################################################
#test that time slicing is computed correctly
#########################################################
print "Performing Time Slicing Tests\n";
#read in gold time slicing output
(my $goldAPScoresRef, my $goldMAP, my $goldPAtKScoresRef, my $goldFAtKScoresRef)
= &readTimeSlicingData('./t/goldSampleTimeSliceOutput');
#read in new time slicing output
(my $newAPScoresRef, my $newMAP, my $newPAtKScoresRef, my $newFAtKScoresRef)
= &readTimeSlicingData('./samples/sampleTimeSliceOutput');
#check that the correct number of values are read for all the
# time slicing metrics
ok (scalar @{$newAPScoresRef} == 11, "Correct Count of Average Precisions");
ok (scalar @{$newPAtKScoresRef} == 19, "Correct Count of Precision at K's");
ok (scalar @{$newFAtKScoresRef} == 19, "Correct Count of Freq at K's");
#check that each of the AP scores match the gold (within error tolerance)
my $apSame = 1;
for (my $i = 0; $i < scalar @{$goldAPScoresRef}; $i++) {
#check both comma seperated values (precision and recall)
my @goldScores = split(',',${$goldAPScoresRef}[$i]);
my @newScores = split(',',${$newAPScoresRef}[$i]);
if ((abs($goldScores[0]-$newScores[0]) > $precRecallErrorTol)
&& (abs($goldScores[1]-$newScores[1]) > $precRecallErrorTol)) {
$apSame = 0;
last;
}
}
ok($apSame == 1, "Average Precisions Match");
#check MAP is the same (within error tolerance)
ok (abs($goldMAP - $newMAP) > $precRecallErrorTol, "Mean Average Precision Matches");
#check that each of Precision at K scores match the gold
# (within error tolerance)
my $pAtKSame = 1;
for (my $i = 0; $i < scalar @{$goldPAtKScoresRef}; $i++) {
if (abs(${$goldPAtKScoresRef}[$i] - ${$newPAtKScoresRef}[$i]) > $atKErrorTol) {
$pAtKSame = 0;
last;
}
}
ok($pAtKSame == 1, "Precision at K Matches");
#check that each of the Freq at K scores match the gold
# (within error tolerance)
my $fAtKSame = 1;
for (my $i = 0; $i < scalar @{$goldFAtKScoresRef}; $i++) {
if (abs(${$goldFAtKScoresRef}[$i] - ${$newFAtKScoresRef}[$i]) > $atKErrorTol) {
$fAtKSame = 0;
last;
}
}
ok($fAtKSame == 1, "Frequency at K Matches");
print "Done with Time Slicing Tests\n";
############################################################
#function to read in time slicing data values
sub readTimeSlicingData {
my $fileName = shift;
#read in the gold time slicing values
my @APScores = ();
my $MAP;
my @PAtKScores = ();
my @FAtKScores = ();
open IN, "$fileName"
#open IN, './t/goldSampleTimeSliceOutput'
or die ("Error: Cannot open timeSliceOutput: $fileName\n");
while (my $line = <IN>) {
#read in the 11 values of average precision
if ($line =~ /average precision at 10% recall intervals/ ) {
while (my $line2 = <IN>) {
if ($line2 =~ /\d\s(\d\.?\d*)\s(\d\.\d*)/) {
push @APScores, "$1,$2";
}
( run in 1.399 second using v1.01-cache-2.11-cpan-f56aa216473 )