ALBD

 view release on metacpan or  search on metacpan

lib/LiteratureBasedDiscovery/Rank.pm  view on Meta::CPAN

# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_LTC_AMW {
    #grab the input
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;
    my $measure = shift;
    my $association = shift;
    my $abScoresRef = shift;

    #optionally pass in stats so they don't get recalculated for
    # multiple terms (such as with time slicing)
    my $n1pRef = shift;
    my $np1Ref = shift;
    my $nppRef = shift;

    #get linking term count scores
    my $ltcAssociationsRef = &scoreImplicit_linkingTermCount($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef);

    #get average minimum weight scores
    my $amwScoresRef = &scoreImplicit_averageMinimumWeight($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef, $measure, $association, $abScoresRef, $n1pRef, $np1Ref, $nppRef); 

    #create a hash of cui pairs for which the key is the ltc, and the value is an array of cui pairs that have that LTC
    my %ltcHash = ();
    foreach my $pairKey (keys %{$ltcAssociationsRef}) {
		
	#get the LTC we will be tie breaking
	my $currentLTC = ${$ltcAssociationsRef}{$pairKey};
	if (!exists $ltcHash{$currentLTC}) {
	    my @newArray = ();
	    $ltcHash{$currentLTC} = \@newArray;
	}
	push @{$ltcHash{$currentLTC}}, $pairKey;
    }

    #generate the LTC-AMW scores by assigning a rank value
    # first by LTC, and then my AMW
    my %ltcAMWScores = ();
    my $topRank = scalar keys %{$ltcAssociationsRef};
    my $currentRank = $topRank;
    #iterate first over ltc in descending order
    foreach my $ltc (sort {$b <=> $a} keys %ltcHash) {

	#check each cuiPair with this ltc
	my %tiedAMWScores = ();
	foreach my $cuiPair (@{$ltcHash{$ltc}}) {
	    $tiedAMWScores{$cuiPair} = ${$amwScoresRef}{$cuiPair};
	}

	#add the cui pairs by descending amw score
	foreach my $cuiPair (sort {$tiedAMWScores{$b} <=> $tiedAMWScores{$a}} keys %tiedAMWScores) {
	    $ltcAMWScores{$cuiPair} = $currentRank;
	    $currentRank--;
	}
    }

    #return the scores
    return \%ltcAMWScores;
}

#TODO this is an untested method
# gets the max cosine distance score between all a terms and each cTerm 
# input:  $startingMatrixRef <- ref to the starting matrix
#         $explicitMatrixRef <- ref to the explicit matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub score_cosineDistance {
    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #get all the A->C pairs
    my $acPairsRef = &_getACPairs($startingMatrixRef, $implicitMatrixRef);
    my %scores = ();
    foreach my $pairKey (keys %{$acPairsRef}) {
	#get the A and C keys
	my ($aKey, $cKey) = split(/,/,$pairKey);

	#grab the A and C explicit vectors
	my $aVectorRef = ${$explicitMatrixRef}{$aKey};
	my $cVectorRef = ${$explicitMatrixRef}{$cKey};

	#find the numerator which is the sum of A[i]*C[i] values
	my $numerator = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    if (exists ${$cVectorRef}{$key}) {
		$numerator += ${$aVectorRef}{$key} * ${$cVectorRef}{$key};
	    }
	}

	#find the sum of A squared
	my $aSum = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    $aSum += ($key*$key);
	}

	#find the sum of C squared
	my $cSum = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    $cSum += ($key*$key);
	}

	#find the denominator, which is the product of A and C lengths
	my $denom = sqrt($aSum)*sqrt($cSum);

	#set the score (maximum score seen for that C term)
	my $score = -1;
	if ($denom != 0) {
	    $score = $numerator/$denom;
	}
	if (exists $scores{$cKey}) {
	    if ($score > $scores{$cKey}) {
		$scores{$cKey} = $score;
	    }
	}
	else {
	    $scores{$cKey} = $score;
	}	
    }
    



( run in 3.236 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )