Pheno-Ranker

 view release on metacpan or  search on metacpan

lib/Pheno/Ranker/Compare/Matrix.pm  view on Meta::CPAN

package Pheno::Ranker::Compare::Matrix;

use strict;
use warnings;
use autodie;
use feature qw(say);

use Sort::Naturally qw(nsort);

use Pheno::Ranker::Metrics;

use Exporter 'import';
our @EXPORT_OK = qw(cohort_comparison);

sub cohort_comparison {
    my ( $ref_binary_hash, $self ) = @_;
    my $out_file          = $self->{out_file};
    my $similarity_metric = $self->{similarity_metric_cohort};
    my $matrix_format     = $self->{matrix_format} || 'dense';

    # Define limit #items for switching to whole matrix calculation
    my $max_items = $self->{max_matrix_records_in_ram};

    # Inform about the start of the comparison process
    say "Performing COHORT comparison"
      if ( $self->{debug} || $self->{verbose} );

    # Define the subroutine to be used
    my %similarity_function = (
        'hamming' => \&hd_fast,
        'jaccard' => \&jaccard_similarity_formatted
    );

    # Define values for diagonal elements depending on metric
    my %similarity_diagonal = (
        'hamming' => 0,
        'jaccard' => 1
    );

    # Use previous hashes to define stuff
    my $metric              = $similarity_function{$similarity_metric};
    my $similarity_diagonal = $similarity_diagonal{$similarity_metric};

    # Sorting keys of the hash
    my @ids     = nsort( keys %{$ref_binary_hash} );
    my @strings = map { $ref_binary_hash->{$_}{binary_digit_string_weighted} } @ids;
    my $num_items = scalar @ids;

    # Define $switch for going from RAM to all calculations
    my $switch = $num_items > $max_items ? 1 : 0;

    say "RAM efficient mode is: "
      . ( $switch ? "on" : "off" )
      . " (max_matrix_records_in_ram: $max_items)"
      if ( $self->{debug} || $self->{verbose} );

    if ( $matrix_format eq 'mtx' ) {
        _write_matrix_market(
            {
                out_file            => $out_file,
                ids                 => \@ids,
                strings             => \@strings,
                metric              => $metric,
                similarity_diagonal => $similarity_diagonal,
                verbose             => $self->{verbose},
            }
        );
        say "Matrix saved to <$out_file>" if ( $self->{debug} || $self->{verbose} );



( run in 0.636 second using v1.01-cache-2.11-cpan-524268b4103 )