ALBD
view release on metacpan or search on metacpan
lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN
#loop through the matrix and keep the rows that are of the
# desired semantic types
my %rowsToKeep = ();
foreach my $cui1 (keys %{$matrixRef}) {
my $typesRef = $umls->getSt($cui1);
foreach my $type(@{$typesRef}) {
my $abr = $umls->getStAbr($type);
#check the cui for removal
if (exists ${$acceptTypesRef}{$type}) {
$rowsToKeep{$cui1} = 1;
last;
}
}
}
#return the rowsToKeep
return \%rowsToKeep
}
# generates a hash of all association scores from the matrix
# the hash keys are $rowKey,$colKey. Hash values are the association scores
# between the $rowKey and $colKey. All co-occurring cui pairs from the matrix
# are calculated
# input: $matrixRef <- a reference to a sparse matrix
# $rankingMeasue <- a string specifying the ranking measure to use
# $umls_association <- an instance of UMLS::Association
# output: \%cuiPairs <- a ref to a hash of CUI pairs and their assocaition
# each key of the hash is a comma seperated string
# containing cui1, and cui2 of the pair
# (e.g. 'cui1,cui2'), and each value is their association
# score using the specified assocition measure
sub getAssociationScores {
my $matrixRef = shift;
my $rankingMeasure = shift;
my $umls_association = shift;
print " getting Association Scores, rankingMeasure = $rankingMeasure\n";
#generate a list of cui pairs in the matrix
my %cuiPairs = ();
print " generating association scores:\n";
foreach my $rowKey (keys %{$matrixRef}) {
foreach my $colKey (keys %{${$matrixRef}{$rowKey}}) {
$cuiPairs{"$rowKey,$colKey"} = ${${$matrixRef}{$rowKey}}{$colKey};
}
}
#get ranks for all the cui pairs in the matrix
#return a hash of cui pairs and their frequency
if ($rankingMeasure eq 'frequency') {
return \%cuiPairs;
} else {
#updates values in cuiPairs hash with their association scores and returns
Rank::getBatchAssociationScores(\%cuiPairs, $matrixRef, $rankingMeasure, $umls_association);
return \%cuiPairs;
}
}
# gets the min and max value of a hash
# returns a two element array, where the first value is the min, and
# the second values is the max
# input: $hashref <- a reference to a hash with numbers as values
# output: ($min, $max) <- the minimum and maximum values in the hash
sub getMinMax {
my $hashRef = shift;
#loop through each key and record the min/max
my $min = 999999;
my $max = -999999;
foreach my $key (keys %{$hashRef}) {
my $val = ${$hashRef}{$key};
if ($val < $min) {
$min = $val;
}
if ($val > $max) {
$max = $val;
}
}
return ($min,$max);
}
# Applies a threshold to a matrix using a corresponding association scores
# hash. Any keys less than the threshold are not copied to the new matrix
# input: $threshold <- a scalar threshold
# $assocScoresRef <- a reference to a cui pair hash of association
# scores. Each key is a comma seperated cui pair
# (e.g. 'cui1,cui2'), values are their association
# scores.
# $matrixRef <- a reference to a co-occurrence sparse matrix that
# corresponds to the assocScoresRef
# output: \%thresholdedMatrix < a ref to a new matrix, built from the
# $matrixRef after applying the $threshold
sub applyThreshold {
my $threshold = shift;
my $assocScoresRef = shift;
my $matrixRef = shift;
#apply the threshold
my $preKeyCount = scalar keys %{$assocScoresRef};
my $postKeyCount = 0;
my %thresholdedMatrix = ();
my ($cui1, $cui2);
foreach my $key (keys %{$assocScoresRef}) {
#add key if val >= threshold
if (${$assocScoresRef}{$key} >= $threshold) {
($cui1,$cui2) = split(/,/, $key);
#create new hash at rowkey location
if (!(exists $thresholdedMatrix{$cui1})) {
my %newHash = ();
$thresholdedMatrix{$cui1} = \%newHash;
}
#set key value
${$thresholdedMatrix{$cui1}}{$cui2} = ${${$matrixRef}{$cui1}}{$cui2};
$postKeyCount++;
}
}
#return the thresholded matrix
return \%thresholdedMatrix;
( run in 1.985 second using v1.01-cache-2.11-cpan-5837b0d9d2c )