ALBD

 view release on metacpan or  search on metacpan

lib/LiteratureBasedDiscovery/Rank.pm  view on Meta::CPAN

	#check each cuiPair with this ltc
	my %tiedAMWScores = ();
	foreach my $cuiPair (@{$ltcHash{$ltc}}) {
	    $tiedAMWScores{$cuiPair} = ${$amwScoresRef}{$cuiPair};
	}

	#add the cui pairs by descending amw score
	foreach my $cuiPair (sort {$tiedAMWScores{$b} <=> $tiedAMWScores{$a}} keys %tiedAMWScores) {
	    $ltcAMWScores{$cuiPair} = $currentRank;
	    $currentRank--;
	}
    }

    #return the scores
    return \%ltcAMWScores;
}

#TODO this is an untested method
# gets the max cosine distance score between all a terms and each cTerm 
# input:  $startingMatrixRef <- ref to the starting matrix
#         $explicitMatrixRef <- ref to the explicit matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub score_cosineDistance {
    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #get all the A->C pairs
    my $acPairsRef = &_getACPairs($startingMatrixRef, $implicitMatrixRef);
    my %scores = ();
    foreach my $pairKey (keys %{$acPairsRef}) {
	#get the A and C keys
	my ($aKey, $cKey) = split(/,/,$pairKey);

	#grab the A and C explicit vectors
	my $aVectorRef = ${$explicitMatrixRef}{$aKey};
	my $cVectorRef = ${$explicitMatrixRef}{$cKey};

	#find the numerator which is the sum of A[i]*C[i] values
	my $numerator = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    if (exists ${$cVectorRef}{$key}) {
		$numerator += ${$aVectorRef}{$key} * ${$cVectorRef}{$key};
	    }
	}

	#find the sum of A squared
	my $aSum = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    $aSum += ($key*$key);
	}

	#find the sum of C squared
	my $cSum = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    $cSum += ($key*$key);
	}

	#find the denominator, which is the product of A and C lengths
	my $denom = sqrt($aSum)*sqrt($cSum);

	#set the score (maximum score seen for that C term)
	my $score = -1;
	if ($denom != 0) {
	    $score = $numerator/$denom;
	}
	if (exists $scores{$cKey}) {
	    if ($score > $scores{$cKey}) {
		$scores{$cKey} = $score;
	    }
	}
	else {
	    $scores{$cKey} = $score;
	}	
    }
    
    return \%scores;
}

# gets a list of A->C pairs, and sets the value as the implicit matrix value
# input:  $startingMatrixRef <- ref to the starting matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref where keys are comma seperated cui pairs hash{'C000,C111'}
#         and values are set to the value at that index in the implicit matrix
sub _getACPairs {
    my $startingMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #generate a list of ac pairs
    my %acPairs = ();
    foreach my $keyA (keys %{$implicitMatrixRef}) {
	foreach my $keyC (%{${$implicitMatrixRef}{$keyA}}) {
	    $acPairs{$keyA,$keyC} = ${${$implicitMatrixRef}{$keyA}}{$keyC};
	}
    }
    
    return \%acPairs;

}


# scores each implicit CUI based on the number of linking terms between
# it and all starting terms.
# input:  $startingMatrixRef <- ref to the starting matrix
#         $explicitMatrixRef <- ref to the explicit matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_linkingTermCount {
    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #get all bc pairs
    my $bcPairsRef = &_getBCPairs($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef);

    # Find the linking term count for each cTerm
    my %scores = ();
    my ($key1, $key2);



( run in 1.013 second using v1.01-cache-2.11-cpan-140bd7fdf52 )