ALBD

 view release on metacpan or  search on metacpan

INSTALL  view on Meta::CPAN

    completely know what you're doing!

    NOTE: If one (or more) of the tests run by 'make test' fails, you will
    see a summary of the tests that failed, followed by a message of the
    form "make: *** [test_dynamic] Error Y" where Y is a number between 1
    and 255 (inclusive). If the number is less than 255, then it indicates
    how many test failed (if more than 254 tests failed, then 254 will still
    be shown). If one or more tests died, then 255 will be shown. For more
    details, see:

        <http://search.cpan.org/dist/Test-Simple/lib/Test/Builder.pm#EXIT_CODES>

  Stage 4: Create an co-occurrence matrix
    ALBD requires that a co-occurrence matrix of CUIs has been created. This
    matrix is stored as a flat file, in a sparse matrix format such that
    each line contains three tab seperated values, cui_1, cui_2, n_11 = the
    count of their co-occurrences. Any matrix with that format is
    acceptable, however the intended method of matrix generation is to
    convert a UMLS::Association database into a flat matrix file. These
    databases are created using the CUICollector tool of UMLS::Association,
    and are run over the MetaMapped Medline baseline. With that file, run

lib/ALBD.pm  view on Meta::CPAN

    print "-----------------------\n";

#Remove Known Connections
    $implicitMatrixRef = Discovery::removeExplicit($explicitMatrixRef, 
						   $implicitMatrixRef);
    print "Implicit Matrix with Explicit Removed\n";
    _printMatrix($implicitMatrixRef, $matrixSize, $indexToCuiRef);
    print "-----------------------\n";
    print "\n\n";

#Test N11, N1P, etc...
    #NOTE...always do n11 first, if n11 = -1, no need to compute the others...there is no co-occurrence between them
    my $n11 = Rank::getN11('C0','C2',$explicitMatrixRef);
    my $npp = Rank::getNPP($explicitMatrixRef);
    my $n1p = Rank::getN1P('C0', $explicitMatrixRef);
    my $np1 = Rank::getNP1('C2', $explicitMatrixRef); 
    print "Contingency Table Values from Explicit Matrix\n";
    print "n11 = $n11\n";
    print "npp = $npp\n";
    print "n1p = $n1p\n";
    print "np1 = $np1\n";

#Test other rank methods
    my $scoresRef = Rank::scoreImplicit_fromAllPairs($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef, $lbdOptions{rankingMethod}, $umls_association);
    my $ranksRef = Rank::rankDescending($scoresRef);
    print "Scores: \n";
    foreach my $cui (keys %{$scoresRef}) {
	print "   scores{$cui} = ${$scoresRef}{$cui}\n";
    }
    print "Ranks = ".join(',', @{$ranksRef})."\n";
}

sub _printMatrix {

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

	if ($numTrue == 0) {
	    next;
	}
	#skip if there are NO predictions for this start term
	if ($numPredictions == 0) {
	    next;
	}

	#determine precision and recall at 10% intervals of the number of 
	#predicted true vaules. This is done by simulating a threshold being
	#applied, so the top $numToTest ranked terms are tested at 10% intervals
	my $interval = $numPredictions/$numIntervals;
	for (my $i = 0; $i <= 1; $i+=(1/$numIntervals)) {
	    
	    #determine the number true to grab
	    my $numTrueForInterval = 1; #at $i = 0, grab just the first term that is true
	    if ($i > 0) {
		$numTrueForInterval = $numTrue*$i;
	    }

	    #grab true discoveries until the recall rate is exceeded

samples/lbdConfig  view on Meta::CPAN

# dice - Dice Coefficient
# left - Fishers exact test - left sided 
# right - Fishers exact test - right sided 
# twotailed - Fishers twotailed test
# jaccard - Jaccard Coefficient
# ll - Log-likelihood ratio
# tmi - Mutual Information
# odds - Odds Ratio
# pmi - Pointwise Mutual Information
# phi - Phi Coefficient
# chi - Pearson's Chi Squared Test
# ps - Poisson Stirling Measure 
# tscore - T-score
<rankingMeasure>ll

# The output path of the results of lbd
<implicitOutputFile>sampleOutput

# a comma seperated list of linking (B) term accept semantic groups, which 
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS. 

samples/timeSlicingConfig  view on Meta::CPAN

# dice - Dice Coefficient
# left - Fishers exact test - left sided 
# right - Fishers exact test - right sided 
# twotailed - Fishers twotailed test
# jaccard - Jaccard Coefficient
# ll - Log-likelihood ratio
# tmi - Mutual Information
# odds - Odds Ratio
# pmi - Pointwise Mutual Information
# phi - Phi Coefficient
# chi - Pearson's Chi Squared Test
# ps - Poisson Stirling Measure 
# tscore - T-score
<rankingMeasure>ll

# a comma seperated list of linking (B) term accept semantic groups, which 
# restricts the linking terms to the semantic groups specified. Group names
# come directly from the UMLS. 
# See https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt for a list
#<linkingAcceptGroups>CHEM,DISO,GENE,PHYS,ANAT

t/test.t  view on Meta::CPAN

#!/usr/local/bin/perl -w

# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl t/lch.t'

use strict;
use warnings;
use Test::Simple tests => 10;

#error tolerance for exact numerical matches due to precision issues 
# and sort issues (again due to precision) there may be small
# differences between runs. The precision at K difference is 
# larger due to small differences in ranking making big differences
# in scores when the K < 10. See Rank::rankDescending for more
# details as to why the ranking imprecision occurrs
my $precRecallErrorTol = 0.0001;
my $atKErrorTol = 1.0;

#######################################################
# test script to run the sample code and compare its 
# output to the expected output. This tests both the 
# open and closed discovery code portions
#########################################################


#Test that the demo file can run correctly
`(cd ./samples/; perl runSample.pl) &`;

#######################################################
#test that the demo output matches the expected demo output
#########################################################
print "Performing Open Discovery Tests:\n";

#read in the gold scores from the open discovery gold
my %goldScores = ();
open IN, './t/goldSampleOutput' 
    or die ("Error: Cannot open gold sample output\n");
while (my $line = <IN>) {
    if ($line =~ /\d+\t(\d+\.\d+)\t(C\d+)/) {
	$goldScores{$2} = $1;
    }
}

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.123 second using v1.00-cache-2.02-grep-82fe00e-cpan-585fae043c8 )