ALBD

 view release on metacpan or  search on metacpan

lib/ALBD.pm  view on Meta::CPAN

      association measure are available as well as frequency based
      ranking methods. See samples/lbd for more info. Can perform open and
      closed LBD as well as time slicing evaluation.

=head1 INSTALL

To install the module, run the following magic commands:

  perl Makefile.PL
  make
  make test
  make install

This will install the module in the standard location. You will, most
probably, require root privileges to install in standard system
directories. To install in a non-standard directory, specify a prefix
during the 'perl Makefile.PL' stage as:

  perl Makefile.PL PREFIX=/home/sid

It is possible to modify other parameters during installation. The

lib/ALBD.pm  view on Meta::CPAN

#Done
    print "DONE!\n\n";
}
=cut


##################################################
################ Time Slicing ####################
##################################################

#NOTE: This function isn't really tested, and is really slow right now
# Generates precision and recall values by varying the threshold
# of the A->B ranking measure.
# input:  none
# output: none, but precision and recall values are printed to STDOUT
sub timeSlicing_generatePrecisionAndRecall_explicit {
    my $NUM_SAMPLES = 100; #TODO, read fomr file number of samples to average over for timeslicing
    my $self = shift;
    print "In timeSlicing_generatePrecisionAndRecall\n";

    my $numIntervals = 10;

lib/LiteratureBasedDiscovery/Rank.pm  view on Meta::CPAN

	foreach my $cuiPair (sort {$tiedAMWScores{$b} <=> $tiedAMWScores{$a}} keys %tiedAMWScores) {
	    $ltcAMWScores{$cuiPair} = $currentRank;
	    $currentRank--;
	}
    }

    #return the scores
    return \%ltcAMWScores;
}

#TODO this is an untested method
# gets the max cosine distance score between all a terms and each cTerm 
# input:  $startingMatrixRef <- ref to the starting matrix
#         $explicitMatrixRef <- ref to the explicit matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub score_cosineDistance {
    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

#                         predictions are cuis are ordered in descending order 
#                         based on association. (from Rank::RankDescending)
#         $numIntervals <- the number of recall intervals to generate
# output: (\%precision, \%recall) <- refs to hashes of precision and recall. 
#                                    Each hash key is the interval number, and 
#                                    the value is the precision and recall 
#                                    respectively
sub calculatePrecisionAndRecall_implicit {
    my $trueMatrixRef = shift; #a ref to the true matrix
    my $rowRanksRef = shift; #a ref to ranked predictions, each hash element are the predictions for a single cui, at each element is an array of cuis ordered by their rank
    my $numIntervals = shift; #the recall intervals to test at

    #find precision and recall curves for each cui that is being predicted
    #  take the sum of precisions, then average after the loop
    my %precision = ();
    my %recall = ();
    foreach my $rowKey (keys %{$trueMatrixRef}) {
	my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries
	my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
	
	#get the number of predicted discoveries and true discoveries

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

	if ($numTrue == 0) {
	    next;
	}
	#skip if there are NO predictions for this start term
	if ($numPredictions == 0) {
	    next;
	}

	#determine precision and recall at 10% intervals of the number of 
	#predicted true vaules. This is done by simulating a threshold being
	#applied, so the top $numToTest ranked terms are tested at 10% intervals
	my $interval = $numPredictions/$numIntervals;
	for (my $i = 0; $i <= 1; $i+=(1/$numIntervals)) {
	    
	    #determine the number true to grab
	    my $numTrueForInterval = 1; #at $i = 0, grab just the first term that is true
	    if ($i > 0) {
		$numTrueForInterval = $numTrue*$i;
	    }

	    #grab true discoveries until the recall rate is exceeded

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

	    #sum precision at this interval, average over number of rows is 
	    # taken outside of the loop
	    $precision{$i} += ($truePositive / $numChecked); #number that are selected that are true
	    $recall{$i} += ($truePositive / $numTrue); #number of true that are selected	
	}
    }

    #calculate the average precision at each interval
    foreach my $i (keys %precision) {
	#divide by the number of rows in the true matrix ref
	# because those are the number of cuis we are testing
	# it is possible that the predictions has rows that are 
	# not in the true, and those should be ignored.
	$precision{$i} /= (scalar keys %{$trueMatrixRef});
	$recall{$i} /= (scalar keys %{$trueMatrixRef});
    }

    #return the precision and recall at 10% intervals
    return (\%precision, \%recall);
}

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

    my $trueMatrixRef = shift; # a matrix of true discoveries
    my $rowRanksRef = shift; # a hash of ranked predicted discoveries
  
    #generate mean cooccurrences at k at intervals of 10 for k = 10-100
    my %meanCooccurrenceCount = (); #count of the number of co-occurrences for each k
    my $interval = 1;
    for (my $k = 1; $k <= 100; $k+=$interval) {
	$meanCooccurrenceCount{$k} = 0;

	#average the mean co-occurrenes over all terms
	#  the true matrix contains only rows for the cuis being tested 
        #  or in time slicing
	foreach my $rowKey (keys %{$trueMatrixRef}) {
	    my $rankedPredictionsRef = ${$rowRanksRef}{$rowKey}; #an array ref of ranked predictions
	    
	    #skip for rows that have no predictions
	    if (!defined $rankedPredictionsRef) {
		next;
	    } 
	    my $trueRef = ${$trueMatrixRef}{$rowKey}; #a list of true discoveries

utils/datasetCreator/fromMySQL/removeQuotes.pl  view on Meta::CPAN

#renoves quotes from a db to tab file

my $inFile = '1980_1984_window1_retest_data.txt';
my $outFile = '1980_1984_window1_restest_DELETEME';


open IN, $inFile or die ("unable to open inFile: $inFile\n");
open OUT, '>'.$outFile or die ("unable to open outFile: $outFile\n");

while (my $line  = <IN>) {
    $line =~ s/"//g;
    #print $line;
    print OUT $line;
}

utils/runDiscovery.pl  view on Meta::CPAN

."\nOPTIONS\n"
."   --assocConfig        path to the UMLS::Association Config File\n"
."   --interfaceConfig    path to the UMLS::Interface Config File\n"
."\nUSAGE EXAMPLES\n"
."   runDiscovery lbdConfigFile\n";
;

#############################################################################
#                       Parse command line options 
#############################################################################
my $DEBUG = 0;      # Prints EVERYTHING. Use with small testing files.        
my $HELP = '';      # Prints usage and exits if true.
my $VERSION;

#set default param values
my %options = ();
$options{'assocConfig'}  = '';
$options{'interfaceConfig'} = '';

#grab all the options and set values
GetOptions( 'debug'             => \$DEBUG, 



( run in 2.608 seconds using v1.01-cache-2.11-cpan-8f98c5d2c55 )