ALBD
view release on metacpan or search on metacpan
All the modules that will be installed in the Perl system directory are
present in the '/lib' directory tree of the package.
The package contains a utils/ directory that contain Perl utility
programs. These utilities use the modules or provide some supporting
functionality.
runDiscovery.pl -- runs LBD using the parameters specified in the input
file, and outputs to an output file.
The package contains a large selection of functions to manipulate CUI
Co-occurrence matrices in the utils/datasetCreator/ directory. These are
short scripts and generally require modifying the code at the top with
user input paramaters specific for each run. These scripts include:
applyMaxThreshold.pl -- applies a maximum co-occurrence threshold to the
co-occurrence matrix
applyMinThreshold.pl -- applies a minimum co-occurrence threshold to the
co-occurrence matrix
lib/LiteratureBasedDiscovery/Discovery.pm view on Meta::CPAN
# check tableName
#TODO check that the table exists in the database
# or die "Error: table does not exist: $tableName\n";
# set up database
my $db = $cuiFinder->_getDB();
# retreive the table as a nested hash where keys are CUI1,
# then CUI2, value is N11
my @keyFields = ('cui_1', 'cui_2');
my $matrixRef = $db->selectall_hashref(
"select * from $tableName", \@keyFields);
# set values of the loaded table to n_11
# ...default is hash of hash of hash
foreach my $key1(keys %{$matrixRef}) {
foreach my $key2(keys %{${$matrixRef}{$key1}}) {
${${$matrixRef}{$key1}}{$key2} = ${${${$matrixRef}{$key1}}{$key2}}{'n_11'};
}
}
return $matrixRef;
}
lib/LiteratureBasedDiscovery/Filters.pm view on Meta::CPAN
$termsHash{$key2} = 1;
}
}
print " number of keys after filtering = ".(scalar keys %termsHash)."\n";
=cut
}
# applies a semantic group filter to the matrix, by removing keys that
# are not allowed semantic type. Only removes types from rows,
# so is applied for times slicing, before randomly selecting terms of
# one semantic type
# input: $matrixRef <- ref to a sparse matrix to be filtered
# $acceptTypesRef <- a ref to a hash of accept type strings
# $umls <- an instance of UMLS::Interface
# output: None, but $vectorRef is updated
sub semanticTypeFilter_rows {
my $matrixRef = shift;
my $acceptTypesRef = shift;
my $umls = shift;
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
}
}
}
close IN;
#return the post cutoff matrix
return \%postCutoffMatrix;
}
#TODO numRows should be read from file and sent with the lbdOptionsRef
# generates a starting matrix of numRows randomly selected terms
# input: $explicitMatrixRef <- a ref to the explicit sparse matrix
# $lbdOptionsRef <- the LBD options
# $startTermAcceptTypesRef <- a reference to an hash of accept
# types for start terms (TUIs)
# $numRows <- the number of random rows to load (if random)
# $umls_interface <- an instance of the UMLS::Interface
# output: \%startingMatrix <- a ref to the starting sparse matrix
sub generateStartingMatrix {
my $explicitMatrixRef = shift;
my $lbdOptionsRef = shift;
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
}
}
}
else {
#randomly grab rows
#apply semantic filter to the rows (just retreive appropriate rows)
my $rowsToKeepRef = getRowsOfSemanticTypes(
$explicitMatrixRef, $startTermAcceptTypesRef, $umls_interface);
((scalar keys %{$rowsToKeepRef}) >= $numRows) or die("ERROR: number of acceptable rows starting terms is less than $numRows\n");
#randomly select 100 rows (to generate the 'starting matrix')
#generate random numbers from 0 to number of rows in the explicit matrix
my %rowNumbers = ();
while ((scalar keys %rowNumbers) < $numRows) {
$rowNumbers{int(rand(scalar keys %{$rowsToKeepRef}))} = 1;
}
#fill starting matrix with keys corresponding to the random numbers
my $i = 0;
foreach my $key (keys %{$rowsToKeepRef}) {
if (exists $rowNumbers{$i}) {
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
}
$numChecked++;
#check if the recall rate has been reached
if ($truePositive > $numTrueForInterval) {
last;
}
}
#sum precision at this interval, average over number of rows is
# taken outside of the loop
$precision{$i} += ($truePositive / $numChecked); #number that are selected that are true
$recall{$i} += ($truePositive / $numTrue); #number of true that are selected
}
}
#calculate the average precision at each interval
foreach my $i (keys %precision) {
#divide by the number of rows in the true matrix ref
# because those are the number of cuis we are testing
# it is possible that the predictions has rows that are
# not in the true, and those should be ignored.
$precision{$i} /= (scalar keys %{$trueMatrixRef});
( run in 0.562 second using v1.01-cache-2.11-cpan-49f99fa48dc )