comma results from the CPAN

ALBD
view release on metacpan or search on metacpan
#read in the scores that were just generated
my %newScores = ();
open IN, './samples/sampleOutput' 
    or die ("Error: Cannot open sample output\n");
while (my $line = <IN>) {
    if ($line =~ /\d+\t(\d+\.\d+)\t(C\d+)/) {
	$newScores{$2} = $1;
    }
}
close IN;

#check that the number of keys in the input and output files are the same
ok(scalar keys %goldScores == scalar keys %newScores, "Number of Output CUIs match");

#check that the gold and sample scores match
my $allMatch = 1;
my $allExist = 1;
foreach my $key(keys %goldScores) {
    if (exists $newScores{$key}) {
	if ($newScores{$key} != $goldScores{$key}) {
	    $allMatch = 0;
	    last;
	}
    }
    else {
	$allExist = 0;
	$allMatch = 0;
	last;
    }
}
ok ($allExist == 1, "All CUIs exist in the output");  #all cuis exist in the new output file
ok ($allMatch == 1, "All Scores are the same in the output");  #all scores are the same in the new output file

print "Done with Open Discovery Tests\n\n";



#######################################################
#test that time slicing is computed correctly
#########################################################
print "Performing Time Slicing Tests\n";

#read in gold time slicing output
(my $goldAPScoresRef, my $goldMAP, my $goldPAtKScoresRef, my $goldFAtKScoresRef)
    = &readTimeSlicingData('./t/goldSampleTimeSliceOutput');

#read in new time slicing output
(my $newAPScoresRef, my $newMAP, my $newPAtKScoresRef, my $newFAtKScoresRef)
    = &readTimeSlicingData('./samples/sampleTimeSliceOutput');

#check that the correct number of values are read for all the 
# time slicing metrics
ok (scalar @{$newAPScoresRef} == 11, "Correct Count of Average Precisions");
ok (scalar @{$newPAtKScoresRef} == 19, "Correct Count of Precision at K's");
ok (scalar @{$newFAtKScoresRef} == 19, "Correct Count of Freq at K's");

#check that each of the AP scores match the gold (within error tolerance)
my $apSame = 1;
for (my $i = 0; $i < scalar @{$goldAPScoresRef}; $i++) {
    
    #check both comma seperated values (precision and recall)
    my @goldScores = split(',',${$goldAPScoresRef}[$i]);
    my @newScores = split(',',${$newAPScoresRef}[$i]);

    if ((abs($goldScores[0]-$newScores[0]) > $precRecallErrorTol)
	&& (abs($goldScores[1]-$newScores[1]) > $precRecallErrorTol)) {
	$apSame = 0;
	last;
    }
}
ok($apSame == 1, "Average Precisions Match");

#check MAP is the same (within error tolerance)
ok (abs($goldMAP - $newMAP) > $precRecallErrorTol, "Mean Average Precision Matches");

#check that each of Precision at K scores match the gold
# (within error tolerance)
my $pAtKSame = 1;
for (my $i = 0; $i < scalar @{$goldPAtKScoresRef}; $i++) {
    if (abs(${$goldPAtKScoresRef}[$i] - ${$newPAtKScoresRef}[$i]) > $atKErrorTol) {
	$pAtKSame = 0;
	last;
    }
}
ok($pAtKSame == 1, "Precision at K Matches");

#check that each of the Freq at K scores match the gold 
# (within error tolerance)
my $fAtKSame = 1;
for (my $i = 0; $i < scalar @{$goldFAtKScoresRef}; $i++) {
    if (abs(${$goldFAtKScoresRef}[$i] - ${$newFAtKScoresRef}[$i]) > $atKErrorTol) {
	$fAtKSame = 0;
	last;
    }
}
ok($fAtKSame == 1, "Frequency at K Matches");

print "Done with Time Slicing Tests\n";



############################################################
#function to read in time slicing data values
sub readTimeSlicingData {
    my $fileName = shift;

    #read in the gold time slicing values
    my @APScores = ();
    my $MAP;
    my @PAtKScores = ();
    my @FAtKScores = ();
    open IN, "$fileName" 
    #open IN, './t/goldSampleTimeSliceOutput'
	or die ("Error: Cannot open timeSliceOutput: $fileName\n");
    while (my $line = <IN>) {
	#read in the 11 values of average precision
	if ($line =~ /average precision at 10% recall intervals/ ) {
	    while (my $line2 = <IN>) {
		if ($line2 =~ /\d\s(\d\.?\d*)\s(\d\.\d*)/) {
		    push @APScores, "$1,$2";
		}
( run in 0.384 second using v1.01-cache-2.11-cpan-600a1bdf6e4 )