Bio-MUST-Core

 view release on metacpan or  search on metacpan

lib/Bio/MUST/Core/SeqMask/Profiles.pm  view on Meta::CPAN

package Bio::MUST::Core::SeqMask::Profiles;
# ABSTRACT: Evolutionary profiles for sequence sites
$Bio::MUST::Core::SeqMask::Profiles::VERSION = '0.252040';
use Moose;
use namespace::autoclean;

use autodie;
use feature qw(say);

use Carp;
use Const::Fast;
use Tie::IxHash;

extends 'Bio::MUST::Core::SeqMask';

use Bio::MUST::Core::Types;
use Bio::MUST::Core::Constants qw(:files);
use aliased 'Bio::MUST::Core::SeqMask::Freqs';


# override superclass' Bool type
# Note: mask indices are as follow: [site]{full_id}{AA}
#       mask values  are AA freqs (both per seq and averaged over seqs)
has '+mask' => (
    isa => 'ArrayRef[HashRef[HashRef[Num]]]',
);

# TODO: mask non-applicable methods from superclass? (Liskov principle)
# TODO: move this under PostPred instead of SeqMask?

const my $AVERAGE => '<:AVERAGE:>';
const my $PREC => 3;


sub ppred_profiles {
    my $class = shift;
    my $alis  = shift;
    my $args  = shift // {};            # HashRef (should not be empty...)

    my $list = $args->{sim_list};

    my @sim_freq_at_for;

    my $regex;
    my $width;
    my $seq_inc;
    my $avg_inc;

    # loop through Ali objects to build site profiles
    # Note: profiles will be available both per seq and averaged over seqs
    for my $ali ( @{$alis} ) {

        # extract seqs on which to compute freqs (defaults to all seqs)
        my $sample = $list ? $list->filtered_ali($ali) : $ali;
        my @seqs = $sample->all_seqs;

        # setup mask details based on first Ali
        unless ($regex) {
            $regex = $ali->gapmiss_regex;
            $width = $ali->width;
            $seq_inc = 1.0 / @{$alis};
            $avg_inc = $seq_inc / @seqs;
        }

        # loop through simulated seqs to store and average ppred state freqs



( run in 1.031 second using v1.01-cache-2.11-cpan-39bf76dae61 )