ALBD

 view release on metacpan or  search on metacpan

lib/LiteratureBasedDiscovery/Rank.pm  view on Meta::CPAN


	#automatically initializes to 0 (with +=)
	$scores{$key2} += ${$bcPairsRef}{$pairKey};
    }
    return \%scores;
}

# scores each implicit CUI using an assocation measure. Score is the maximum 
# association between a column in the implicit matrix, and one of the start 
# matrix terms (so max between any A and that C term). 
# Score is calculated using the implicit matrix
# input:  $startCuisRef <- ref to an array of start cuis (A terms)
#         $implicitMatrixFileName <- fileName of the implicit matrix
#         $measure <- the string of the umls association measure to use
#         $association <- an instance of umls association
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_fromImplicitMatrix {
    #LBD Info
    my $startCuisRef = shift;
    my $implicitMatrixFileName = shift;
    my $measure = shift;
    my $association = shift;

######################################
    #Get hashes for A and C terms
#####################################
    #create a hash of starting terms
    my %aTerms = ();
    foreach my $cui (@{$startCuisRef}) {
	$aTerms{$cui} = 1;
    }

    #get all the target terms (terms that co-occur with aTerms 
    # in the implicit matrix file = the implicit terms)
    open IN, "$implicitMatrixFileName";
    my %cTerms = ();
    while (my $line = <IN>) {
	$line =~ /(C\d{7})\s(C\d{7})/;
	if (exists $aTerms{$1}) {
	    $cTerms{$2} = 1;
	}
    }

######################################
    #Get Co-occurrence values, N11, N1P, NP1, NPP
######################################
    #NPP is the number of Co-occurreces total
    #@NP1 is the number of co-occurrences of a C term with any term ... so sum of XXX\tCTerm\tVal for each cTerm
    #@N1P is the number of co-occurrences of any A term ... so sum of anyATerm\tXXX\t
    #N11{Cterm} is the sum of anyATerm\tCTerm\tVal
    seek IN, 0,0; #reset to the beginning of the implicit file

    #iterate over the lines of interest, and grab values
    my %np1 = ();
    my %n11 = ();
    my $n1p = 0;
    my $npp = 0;
    my $matchedCuiB = 0;
    my ($cuiA, $cuiB, $val);
    while (my $line = <IN>) {
	#grab data from the line
	($cuiA, $cuiB, $val) = split(/\t/,$line);

	#see if updates are necessary
	if (exists $aTerms{$cuiA} || exists $cTerms{$cuiB}) {

	    #update npp
	    $npp += $3;
	    
	    #update np1
	    if (exists $cTerms{$cuiB}) {
		$np1{$cuiB} += $val;
		$matchedCuiB = 1;
	    }

	    #update n1p
	    if (exists $aTerms{$cuiA}) {
		$n1p += $val;

		#update n11 if needed
		if ($matchedCuiB) {
		    $n11{$cuiB} += $val;
		    $matchedCuiB = 0;
		}
	    }
	}
    }


######################################
    # Calculate Association for each c term
######################################
    my %associationScores = ();
    foreach my $cTerm(keys %cTerms) {
	$associationScores{$cTerm} = 
	    $association->_calculateAssociation_fromObservedCounts($n11{$cTerm}, $n1p, $np1{$cTerm}, $npp, $measure);
    }

    return \%associationScores;
}

# scores each implicit CUI using an assocation measure. Score is the maximum 
# association between any of the linking terms.
# input:  $startingMatrixRef <- ref to the starting matrix
#         $explicitMatrixRef <- ref to the explicit matrix
#         $implicitMatrixRef <- ref to the implicit matrix
#         $measure <- the string of the umls association measure to use
#         $association <- an instance of umls association
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_fromAllPairs {
    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;
    my $measure = shift;
    my $association = shift;

    #optionally pass in stats so they don't get recalculated for
    # multiple terms (such as with time slicing)
    my $n1pRef = shift;
    my $np1Ref = shift;



( run in 2.882 seconds using v1.01-cache-2.11-cpan-5837b0d9d2c )