Bio-MUST-Core
view release on metacpan or search on metacpan
lib/Bio/MUST/Core/SeqMask/Rates.pm view on Meta::CPAN
# override superclass' Bool type
# Note: mask indices are as follow: [site]
# mask values are rates
has '+mask' => (
isa => 'ArrayRef[Num]',
);
# TODO: mask non-applicable methods from superclass? (Liskov principle)
sub min_rate {
my $self = shift;
return List::AllUtils::min @{ $self->mask };
}
sub max_rate {
my $self = shift;
return List::AllUtils::max @{ $self->mask };
}
sub delta_rates {
my $self = shift;
my $othr = shift;
# check that both rates objects are the same length
# potential bugs could come from constant sites etc
my $s_width = $self->mask_len;
my $o_width = $othr->mask_len;
carp "[BMC] Warning: Rates widths do not match: $s_width vs. $o_width!"
unless $s_width == $o_width;
my @deltas;
my $ea = each_arrayref [ $self->all_states ], [ $othr->all_states ];
while (my ($s_rate, $o_rate) = $ea->() ) {
push @deltas, 0 + ( sprintf "%.13f", abs( $s_rate - $o_rate ) );
} # Note: trick to get identical results across platforms
# TODO: check that $self->new() is really correct
return $self->new( mask => \@deltas );
}
# Rates-based SeqMask factory methods
# small delta for slightly increasing extreme bins
const my $DELTA => 1e-13;
sub bin_rates_masks {
my $self = shift;
my $bin_n = shift;
my $args = shift // {}; # HashRef (should not be empty...)
my $percentile = $args->{percentile} // 0;
my $cumulative = $args->{cumulative} // 0;
my $descending = $args->{descending} // 0;
my @masks;
# define bin bounds based on equal count (in terms of sites)
if ($percentile) {
# create rates-sorted index of sites (from slow to fast)
my @index = sort {
$self->state_at($a) <=> $self->state_at($b)
} 0..$self->mask_len-1;
# optionally reverse index: higher values mean slower rates (TIGER)
@index = reverse @index if $descending;
# compute masks from index slices
my $step = ceil( @index / $bin_n );
### $step
for my $i (0..$bin_n-1) {
my $min = $cumulative ? 0 : ($i * $step);
my $max = (($i+1) * $step - 1);
$max = $#index if $max > $#index;
### min: $min
### max: $max
### rates: map { $self->state_at($_) } @index[ $min..$max ]
push @masks, SeqMask->custom_mask(
$self->mask_len, [ @index[ $min..$max ] ]
);
}
}
# define bin bounds based on equal width (in terms of rates)
else {
my @bounds;
# compute bin bounds from rate range
my $min = $self->min_rate;
my $max = $self->max_rate;
my $step = ($max - $min) / $bin_n;
for (my $i = $min + $step; $i <= $max; $i += $step) {
push @bounds, $i;
}
# Note: did try to use Statistics::Descriptive with no luck
# add lower bound for first bin...
# ... and small delta to first/last bins for catching min/max values
unshift @bounds, $min - $DELTA;
$bounds[-1] += $DELTA;
# optionally reverse bounds: higher values mean slower rates (TIGER)
@bounds = reverse @bounds if $descending;
### @bounds
# derive masks from bin bounds
for my $i (1..$#bounds) {
push @masks, $self->rates_mask(
$cumulative ? $bounds[0] : $bounds[$i - 1], # bin min
$bounds[$i] # bin max
);
}
}
return @masks;
}
( run in 2.773 seconds using v1.01-cache-2.11-cpan-0bb4e1dffa6 )