ALBD
view release on metacpan or search on metacpan
lib/LiteratureBasedDiscovery/Discovery.pm view on Meta::CPAN
# PreCutoff_N11
# PostCutoff_N11
# PreCutoff_Implicit
#
# Both PreCutoff_N11 and PostCutoff_N11 should
# be generated manually using CUI_Collector
# PreCutoff_Implicit is generated using the tableToSparseMatrix
# function here, which exports a sparse matrix. That matrix
# can then be imported into matlab, squared, and reloaded into
# a mysql database. With these 3 tables LBD can be performed
######################################################################
# Description
######################################################################
# Discovery.pm - provides matrix operations from n11 counts from
# UMLS::Association
#
#TODO I think some of these notes should be elsewhere
# 'B' term filtering may be applied by removing elements from the
# explicit knowledge matrix before squaring. It is important to
# replicate the original matrix before filtering so that explicit
# knowledge can be removed from the implicit matrix.
# 'C' term filtering may be applied directly to the implicit
# knowledge matrix.
#
# A Typical workflow may look like:
# 1) load explicit knowledge from UMLS::Association
# 2) clone explicit knowledge (for removal from implicit)
# 3) apply filtering to explicit knowledge
# 4) square explicit knowledge to generate implicit knowledge
# 5) remove explicit knowledge from implicit knowledge
# 6) filter impicit knowledge
#
# which has code as:
# TODO insert sample code
#NOTE: CUI merging/term expansion can also be easily done by adding
# two or more explicit vectors, then generating explicit knowledge from
# them. BUT also interesting is that term expansion, etc... is
# unnecassary if we just rank against every term. We may however need
# to modify the ranking metrics to account for synonyms, etc.. (max value
# of a set of synonyms or something)
######################################################################
# Functions to perform Literature Based Discovery
######################################################################
# gets the rows of the cuis from the matrix
# input: $cuisRef <- an array reference to a list of CUIs
# $matrixRef <- a reference to a co-occurrence matrix
# output: a hash ref to a sparse matrix containing just the rows retrieved
sub getRows {
my $cuisRef = shift;
my $matrixRef = shift;
my %rows = ();
my $rowRef;
#add each cui row to the starting matrix
foreach my $cui(@{$cuisRef}) {
#if there is a row for this cui
if (exists ${$matrixRef}{$cui}) {
$rowRef = ${$matrixRef}{$cui};
#add each row value to the starting matrix
foreach my $key(keys %{$rowRef}) {
${$rows{$cui}}{$key} = ${$rowRef}{$key};
}
}
}
return \%rows;
}
#NOTE...this is calculating B*A ... but is that appropriate? ... I think that it is, but the values are maybe not so appropriate ... B*A is nice because it makes the implicit matrix not keep track of non-starting cui rows. ...but the values are...
# finds the implicit connections for all CUIs (based on squaring)
# It does this by multiplying $matrixB*$matrixA. If $matrix B is the starting
# matrix, and $matrixA is the explicitMatrix, this method works correctly and
# efficiently. $matrixA and $matrixB may also be the explicit matrix but
# this is more inefficient.
# input: $matrixARef <- ref to a sparse matrix
# $matrixBRef <- ref to a sparse matrix
# output: ref to a sparse matrix of the product of B*A
sub findImplicit {
my $matrixARef = shift;
my $matrixBRef = shift;
my %product = ();
#loop over the rows of the B matrix
foreach my $key0 (keys %{$matrixBRef}) {
#loop over row
foreach my $key1 (keys %{$matrixARef}) {
#loop over column
foreach my $key2 (keys %{${$matrixARef}{$key1}}) {
#update values
if (exists ${${$matrixBRef}{$key0}}{$key1}) {
#update
if (!exists ${$product{$key0}}{$key2}) {
${$product{$key0}}{$key2} = 0;
}
${$product{$key0}}{$key2} +=
${${$matrixBRef}{$key0}}{$key1} *
${${$matrixARef}{$key1}}{$key2};
}
}
}
}
return \%product;
}
# removes explicit connections from the matrix of implicit connections by
# removing keys (O(k), where k is the number of keys in the explicit matrix,
# we expect the explicit k to be smaller than the implicit k)
# input: $explicitMatrixRef <- reference to the explicit knowledge matrix
# $implicitMatrixRef <- reference to the implicit knowledge matrix
# output: ref to the implicit matrix with explicit knowledge removed
sub removeExplicit {
my $explicitMatrixRef = shift;
my $implicitMatrixRef = shift;
#Check each key of the explicit matrix to see if it exists
# in the implicit matrix
foreach my $key1(keys %{$explicitMatrixRef}) {
if (exists ${$implicitMatrixRef}{$key1}) {
foreach my $key2(keys %{${$explicitMatrixRef}{$key1}}) {
if (exists ${${$implicitMatrixRef}{$key1}}{$key2}) {
delete ${${$implicitMatrixRef}{$key1}}{$key2};
}
}
}
}
return $implicitMatrixRef;
( run in 3.726 seconds using v1.01-cache-2.11-cpan-d8267643d1d )