Pheno-Ranker
view release on metacpan or search on metacpan
lib/Pheno/Ranker/Compare/Matrix.pm view on Meta::CPAN
package Pheno::Ranker::Compare::Matrix;
use strict;
use warnings;
use autodie;
use feature qw(say);
use Sort::Naturally qw(nsort);
use Pheno::Ranker::Metrics;
use Exporter 'import';
our @EXPORT_OK = qw(cohort_comparison);
sub cohort_comparison {
my ( $ref_binary_hash, $self ) = @_;
my $out_file = $self->{out_file};
my $similarity_metric = $self->{similarity_metric_cohort};
my $matrix_format = $self->{matrix_format} || 'dense';
# Define limit #items for switching to whole matrix calculation
my $max_items = $self->{max_matrix_records_in_ram};
# Inform about the start of the comparison process
say "Performing COHORT comparison"
if ( $self->{debug} || $self->{verbose} );
# Define the subroutine to be used
my %similarity_function = (
'hamming' => \&hd_fast,
'jaccard' => \&jaccard_similarity_formatted
);
# Define values for diagonal elements depending on metric
my %similarity_diagonal = (
'hamming' => 0,
'jaccard' => 1
);
# Use previous hashes to define stuff
my $metric = $similarity_function{$similarity_metric};
my $similarity_diagonal = $similarity_diagonal{$similarity_metric};
# Sorting keys of the hash
my @ids = nsort( keys %{$ref_binary_hash} );
my @strings = map { $ref_binary_hash->{$_}{binary_digit_string_weighted} } @ids;
my $num_items = scalar @ids;
# Define $switch for going from RAM to all calculations
my $switch = $num_items > $max_items ? 1 : 0;
say "RAM efficient mode is: "
. ( $switch ? "on" : "off" )
. " (max_matrix_records_in_ram: $max_items)"
if ( $self->{debug} || $self->{verbose} );
if ( $matrix_format eq 'mtx' ) {
_write_matrix_market(
{
out_file => $out_file,
ids => \@ids,
strings => \@strings,
metric => $metric,
similarity_diagonal => $similarity_diagonal,
verbose => $self->{verbose},
}
);
say "Matrix saved to <$out_file>" if ( $self->{debug} || $self->{verbose} );
( run in 0.636 second using v1.01-cache-2.11-cpan-524268b4103 )