data results from the CPAN

ALBD
# ALBD::Evaluation.pm
#
# Provides functionality to evaluate LBD systems
# Key components are:
# Results Matrix <- all new knowledge generated by an LBD system (e.g.
#                   all proposed discoveries of a system from pre-cutoff
#                   data).
# Gold Standard Matrix <- the gold standard knowledge matrix (e.g. all
#                         knowledge present in the post-cutoff dataset
#                         that is not present in the pre-cutoff dataset).
#
# Copyright (c) 2017
#
# Sam Henry
# henryst at vcu.edu
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to
#
# The Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330,
# Boston, MA  02111-1307, USA.

package Evaluation;
use strict;
use warnings;

# Timeslicing evaluation that calculates the precision of LBD 
# (O(k), where k is the number of keys in results)
# input: $resultsMatrixRef <- ref a matrix of LBD results
#        $goldMatrixRef <- ref to a gold standard matrix
# output: the precision of results
sub calculatePrecision {
    my $resultsMatrixRef = shift;
    my $goldMatrixRef = shift;

    # calculate the precision which is the percentage of results that are 
    # are in the gold standard
    # (percent of generated that is gold)
    my $count = 0;
    foreach my $key(keys %{$resultsMatrixRef}) {
	if (exists ${$goldMatrixRef}{$key}) {
	    $count++;
	}
    }
    return $count/(scalar keys %{$resultsMatrixRef});

}

# Timeslicing evaluation that calculate the recall of LBD 
# (O(k), where k (is the number of keys in gold)
# input: $resultsMatrixRef <- ref a matrix of LBD results
#        $goldMatrixRef <- ref to a gold standard matrix
# output: the recall of results
sub calculateRecall {
    my $resultsMatrixRef = shift;
    my $goldMatrixRef = shift;
    
    # calculate the recall which is the percentage of knowledge in the gold
    # standard that was generated by the LBD system
( run in 1.300 second using v1.01-cache-2.11-cpan-39bf76dae61 )