ALBD
view release on metacpan or search on metacpan
lib/ALBD.pm view on Meta::CPAN
association measure are available as well as frequency based
ranking methods. See samples/lbd for more info. Can perform open and
closed LBD as well as time slicing evaluation.
=head1 INSTALL
To install the module, run the following magic commands:
perl Makefile.PL
make
make test
make install
This will install the module in the standard location. You will, most
probably, require root privileges to install in standard system
directories. To install in a non-standard directory, specify a prefix
during the 'perl Makefile.PL' stage as:
perl Makefile.PL PREFIX=/home/sid
It is possible to modify other parameters during installation. The
lib/ALBD.pm view on Meta::CPAN
#Done
print "DONE!\n\n";
}
=cut
##################################################
################ Time Slicing ####################
##################################################
#NOTE: This function isn't really tested, and is really slow right now
# Generates precision and recall values by varying the threshold
# of the A->B ranking measure.
# input: none
# output: none, but precision and recall values are printed to STDOUT
sub timeSlicing_generatePrecisionAndRecall_explicit {
my $NUM_SAMPLES = 100; #TODO, read fomr file number of samples to average over for timeslicing
my $self = shift;
print "In timeSlicing_generatePrecisionAndRecall\n";
my $numIntervals = 10;
lib/LiteratureBasedDiscovery/Rank.pm view on Meta::CPAN
foreach my $cuiPair (sort {$tiedAMWScores{$b} <=> $tiedAMWScores{$a}} keys %tiedAMWScores) {
$ltcAMWScores{$cuiPair} = $currentRank;
$currentRank--;
}
}
#return the scores
return \%ltcAMWScores;
}
#TODO this is an untested method
# gets the max cosine distance score between all a terms and each cTerm
# input: $startingMatrixRef <- ref to the starting matrix
# $explicitMatrixRef <- ref to the explicit matrix
# $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub score_cosineDistance {
#LBD Info
my $startingMatrixRef = shift;
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
# predictions are cuis are ordered in descending order
# based on association. (from Rank::RankDescending)
# $numIntervals <- the number of recall intervals to generate
# output: (\%precision, \%recall) <- refs to hashes of precision and recall.
# Each hash key is the interval number, and
# the value is the precision and recall
# respectively
sub calculatePrecisionAndRecall_implicit {
my $trueMatrixRef = shift; #a ref to the true matrix
my $rowRanksRef = shift; #a ref to ranked predictions, each hash element are the predictions for a single cui, at each element is an array of cuis ordered by their rank
my $numIntervals = shift; #the recall intervals to test at
#find precision and recall curves for each cui that is being predicted
# take the sum of precisions, then average after the loop
my %precision = ();
my %recall = ();
foreach my $rowKey (keys %{$trueMatrixRef}) {
my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
#get the number of predicted discoveries and true discoveries
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
if ($numTrue == 0) {
next;
}
#skip if there are NO predictions for this start term
if ($numPredictions == 0) {
next;
}
#determine precision and recall at 10% intervals of the number of
#predicted true vaules. This is done by simulating a threshold being
#applied, so the top $numToTest ranked terms are tested at 10% intervals
my $interval = $numPredictions/$numIntervals;
for (my $i = 0; $i <= 1; $i+=(1/$numIntervals)) {
#determine the number true to grab
my $numTrueForInterval = 1; #at $i = 0, grab just the first term that is true
if ($i > 0) {
$numTrueForInterval = $numTrue*$i;
}
#grab true discoveries until the recall rate is exceeded
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
#sum precision at this interval, average over number of rows is
# taken outside of the loop
$precision{$i} += ($truePositive / $numChecked); #number that are selected that are true
$recall{$i} += ($truePositive / $numTrue); #number of true that are selected
}
}
#calculate the average precision at each interval
foreach my $i (keys %precision) {
#divide by the number of rows in the true matrix ref
# because those are the number of cuis we are testing
# it is possible that the predictions has rows that are
# not in the true, and those should be ignored.
$precision{$i} /= (scalar keys %{$trueMatrixRef});
$recall{$i} /= (scalar keys %{$trueMatrixRef});
}
#return the precision and recall at 10% intervals
return (\%precision, \%recall);
}
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
my $trueMatrixRef = shift; # a matrix of true discoveries
my $rowRanksRef = shift; # a hash of ranked predicted discoveries
#generate mean cooccurrences at k at intervals of 10 for k = 10-100
my %meanCooccurrenceCount = (); #count of the number of co-occurrences for each k
my $interval = 1;
for (my $k = 1; $k <= 100; $k+=$interval) {
$meanCooccurrenceCount{$k} = 0;
#average the mean co-occurrenes over all terms
# the true matrix contains only rows for the cuis being tested
# or in time slicing
foreach my $rowKey (keys %{$trueMatrixRef}) {
my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
#skip for rows that have no predictions
if (!defined $rankedPredictionsRef) {
next;
}
my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
utils/datasetCreator/fromMySQL/removeQuotes.pl view on Meta::CPAN
#renoves quotes from a db to tab file
my $inFile = '1980_1984_window1_retest_data.txt';
my $outFile = '1980_1984_window1_restest_DELETEME';
open IN, $inFile or die ("unable to open inFile: $inFile\n");
open OUT, '>'.$outFile or die ("unable to open outFile: $outFile\n");
while (my $line = <IN>) {
$line =~ s/"//g;
#print $line;
print OUT $line;
}
utils/runDiscovery.pl view on Meta::CPAN
."\nOPTIONS\n"
." --assocConfig path to the UMLS::Association Config File\n"
." --interfaceConfig path to the UMLS::Interface Config File\n"
."\nUSAGE EXAMPLES\n"
." runDiscovery lbdConfigFile\n";
;
#############################################################################
# Parse command line options
#############################################################################
my $DEBUG = 0; # Prints EVERYTHING. Use with small testing files.
my $HELP = ''; # Prints usage and exits if true.
my $VERSION;
#set default param values
my %options = ();
$options{'assocConfig'} = '';
$options{'interfaceConfig'} = '';
#grab all the options and set values
GetOptions( 'debug' => \$DEBUG,
( run in 2.608 seconds using v1.01-cache-2.11-cpan-8f98c5d2c55 )