Bio-MUST-Core

 view release on metacpan or  search on metacpan

lib/Bio/MUST/Core/SeqMask/Rates.pm  view on Meta::CPAN


# override superclass' Bool type
# Note: mask indices are as follow: [site]
#       mask values  are rates
has '+mask' => (
    isa => 'ArrayRef[Num]',
);

# TODO: mask non-applicable methods from superclass? (Liskov principle)



sub min_rate {
    my $self = shift;
    return List::AllUtils::min @{ $self->mask };
}



sub max_rate {
    my $self = shift;
    return List::AllUtils::max @{ $self->mask };
}



sub delta_rates {
    my $self = shift;
    my $othr = shift;

    # check that both rates objects are the same length
    # potential bugs could come from constant sites etc
    my $s_width = $self->mask_len;
    my $o_width = $othr->mask_len;
    carp "[BMC] Warning: Rates widths do not match: $s_width vs. $o_width!"
        unless $s_width == $o_width;

    my @deltas;

    my $ea = each_arrayref [ $self->all_states ], [ $othr->all_states ];
    while (my ($s_rate, $o_rate) = $ea->() ) {
        push @deltas, 0 + ( sprintf "%.13f", abs( $s_rate - $o_rate ) );
    }   # Note: trick to get identical results across platforms

    # TODO: check that $self->new() is really correct
    return $self->new( mask => \@deltas );
}


# Rates-based SeqMask factory methods


# small delta for slightly increasing extreme bins
const my $DELTA => 1e-13;

sub bin_rates_masks {
    my $self  = shift;
    my $bin_n = shift;
    my $args  = shift // {};            # HashRef (should not be empty...)

    my $percentile = $args->{percentile} // 0;
    my $cumulative = $args->{cumulative} // 0;
    my $descending = $args->{descending} // 0;

    my @masks;

    # define bin bounds based on equal count (in terms of sites)
    if ($percentile) {

        # create rates-sorted index of sites (from slow to fast)
        my @index = sort {
            $self->state_at($a) <=> $self->state_at($b)
        } 0..$self->mask_len-1;

        # optionally reverse index: higher values mean slower rates (TIGER)
        @index = reverse @index if $descending;

        # compute masks from index slices
        my $step = ceil( @index / $bin_n );
        ### $step
        for my $i (0..$bin_n-1) {
            my $min = $cumulative ? 0 : ($i    * $step);
            my $max =                  (($i+1) * $step - 1);
               $max = $#index if $max > $#index;
            ### min: $min
            ### max: $max
            ### rates: map { $self->state_at($_) } @index[ $min..$max ]
            push @masks, SeqMask->custom_mask(
                $self->mask_len, [ @index[ $min..$max ] ]
            );

        }
    }

    # define bin bounds based on equal width (in terms of rates)
    else {
        my @bounds;

        # compute bin bounds from rate range
        my $min = $self->min_rate;
        my $max = $self->max_rate;
        my $step = ($max - $min) / $bin_n;
        for (my $i = $min + $step; $i <= $max; $i += $step) {
            push @bounds, $i;
        }
        # Note: did try to use Statistics::Descriptive with no luck

        # add lower bound for first bin...
        # ... and small delta to first/last bins for catching min/max values
        unshift @bounds, $min -  $DELTA;
                $bounds[-1]   += $DELTA;

        # optionally reverse bounds: higher values mean slower rates (TIGER)
        @bounds = reverse @bounds if $descending;
        ### @bounds

        # derive masks from bin bounds
        for my $i (1..$#bounds) {
            push @masks, $self->rates_mask(
                $cumulative ? $bounds[0] : $bounds[$i - 1],     # bin min
                $bounds[$i]                                     # bin max
            );
        }
    }

    return @masks;
}



( run in 2.773 seconds using v1.01-cache-2.11-cpan-0bb4e1dffa6 )