ALBD

 view release on metacpan or  search on metacpan

lib/LiteratureBasedDiscovery/Rank.pm  view on Meta::CPAN

    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #get all the A->C pairs
    my $acPairsRef = &_getACPairs($startingMatrixRef, $implicitMatrixRef);
    my %scores = ();
    foreach my $pairKey (keys %{$acPairsRef}) {
	#get the A and C keys
	my ($aKey, $cKey) = split(/,/,$pairKey);

	#grab the A and C explicit vectors
	my $aVectorRef = ${$explicitMatrixRef}{$aKey};
	my $cVectorRef = ${$explicitMatrixRef}{$cKey};

	#find the numerator which is the sum of A[i]*C[i] values
	my $numerator = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    if (exists ${$cVectorRef}{$key}) {
		$numerator += ${$aVectorRef}{$key} * ${$cVectorRef}{$key};
	    }
	}

	#find the sum of A squared
	my $aSum = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    $aSum += ($key*$key);
	}

	#find the sum of C squared
	my $cSum = 0;
	foreach my $key (keys ${$aVectorRef}) {
	    $cSum += ($key*$key);
	}

	#find the denominator, which is the product of A and C lengths
	my $denom = sqrt($aSum)*sqrt($cSum);

	#set the score (maximum score seen for that C term)
	my $score = -1;
	if ($denom != 0) {
	    $score = $numerator/$denom;
	}
	if (exists $scores{$cKey}) {
	    if ($score > $scores{$cKey}) {
		$scores{$cKey} = $score;
	    }
	}
	else {
	    $scores{$cKey} = $score;
	}	
    }
    
    return \%scores;
}

# gets a list of A->C pairs, and sets the value as the implicit matrix value
# input:  $startingMatrixRef <- ref to the starting matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref where keys are comma seperated cui pairs hash{'C000,C111'}
#         and values are set to the value at that index in the implicit matrix
sub _getACPairs {
    my $startingMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #generate a list of ac pairs
    my %acPairs = ();
    foreach my $keyA (keys %{$implicitMatrixRef}) {
	foreach my $keyC (%{${$implicitMatrixRef}{$keyA}}) {
	    $acPairs{$keyA,$keyC} = ${${$implicitMatrixRef}{$keyA}}{$keyC};
	}
    }
    
    return \%acPairs;

}


# scores each implicit CUI based on the number of linking terms between
# it and all starting terms.
# input:  $startingMatrixRef <- ref to the starting matrix
#         $explicitMatrixRef <- ref to the explicit matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_linkingTermCount {
    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #get all bc pairs
    my $bcPairsRef = &_getBCPairs($startingMatrixRef, $explicitMatrixRef, $implicitMatrixRef);

    # Find the linking term count for each cTerm
    my %scores = ();
    my ($key1, $key2);
    foreach my $pairKey (keys %{$bcPairsRef}) {
	#cTerm is the second value ($key2)
	($key1, $key2) = split(/,/,$pairKey);

	#automatically initializes to 0
	$scores{$key2}++;
    }
    return \%scores;
}


# scores each implicit CUI based on the summed frequency of co-occurrence
# between it and all B terms (A->B frequencies are NOT considered)
# input:  $startingMatrixRef <- ref to the starting matrix
#         $explicitMatrixRef <- ref to the explicit matrix
#         $implicitMatrixRef <- ref to the implicit matrix
# output: a hash ref of scores for each implicit key. (hash{cui} = score)
sub scoreImplicit_frequency {
    #LBD Info
    my $startingMatrixRef = shift;
    my $explicitMatrixRef = shift;
    my $implicitMatrixRef = shift;

    #get all bc pairs



( run in 2.132 seconds using v1.01-cache-2.11-cpan-0bb4e1dffa6 )