Bio-MUST-Core
view release on metacpan or search on metacpan
bin/split-rates-ali.pl view on Meta::CPAN
my $outfile = secure_outfile(
change_suffix($infile, '.stats'), $ARGV_out_suffix
);
### Dumping site-wise stats to: $outfile
$rates->store($outfile);
}
}
### Computing masks from stats
my %args;
$args{percentile} = 1 if $ARGV_percentile;
$args{cumulative} = 1 if $ARGV_cumulative;
$args{descending} = 1 if $ARGV_descending;
my @masks = $rates->bin_rates_masks($ARGV_bin_number, \%args);
# output one Ali per bin
for my $i (0..$#masks) {
my $bin_ali = $masks[$i]->filtered_ali($ali);
my $outfile = secure_outfile($alifile, $ARGV_out_suffix . "-bin$i");
# Note: $ARGV_out_suffix defaults to q{} for smooth concatenation
bin/split-rates-ali.pl view on Meta::CPAN
either delta rates or chi-square statistics dependending on the infiles type
(see C<--sitefreq> option).
=item --bin-number=<n>
Number of bins to define [default: 10].
=for Euclid: n.type: number
n.default: 10
=item --percentile
Define bins containing an equal number of sites rather than bins of equal width
in terms of rates [default: no].
=item --cumulative
Define bins including all previous bins [default: no]. This leads to ALI
outfiles of increasing width and only makes sense when slower sites are in
lower bins. If higher "rates" mean slower sites, use the --descending option.
lib/Bio/MUST/Core/Ali.pm view on Meta::CPAN
=head2 seq_len_stats
Returns a list of 5 values summarizing the Ali seq lengths (ignoring gaps).
The values are the following: Q0 (min), Q1, Q2 (median), Q3, and Q4 (max).
This method does not accept any arguments.
=head2 perc_miss
Returns the percentage of missing (and gap-like) character states in the Ali.
As this method internally calls C<Ali::width>, the remarks above also apply.
my $miss_level = $ali->perc_miss;
This method does not accept any arguments.
=head1 MUTATORS
=head2 uc_seqs
lib/Bio/MUST/Core/SeqMask/Rates.pm view on Meta::CPAN
# small delta for slightly increasing extreme bins
const my $DELTA => 1e-13;
sub bin_rates_masks {
my $self = shift;
my $bin_n = shift;
my $args = shift // {}; # HashRef (should not be empty...)
my $percentile = $args->{percentile} // 0;
my $cumulative = $args->{cumulative} // 0;
my $descending = $args->{descending} // 0;
my @masks;
# define bin bounds based on equal count (in terms of sites)
if ($percentile) {
# create rates-sorted index of sites (from slow to fast)
my @index = sort {
$self->state_at($a) <=> $self->state_at($b)
} 0..$self->mask_len-1;
# optionally reverse index: higher values mean slower rates (TIGER)
@index = reverse @index if $descending;
# compute masks from index slices
basename => 'primate',
len => 898,
bin_n => 10,
args => { cumulative => 1, descending => 1 },
bins => [ 373, 373, 376, 376, 376, 444, 517, 554, 706, 898 ],
},
{
basename => 'primate',
len => 898,
bin_n => 5,
args => { percentile => 1, descending => 1 },
bins => [ 180, 180, 180, 180, 178 ],
},
{
basename => 'primate',
len => 898,
bin_n => 5,
args => { cumulative => 1, percentile => 1, descending => 1 },
bins => [ 180, 360, 540, 720, 898 ],
},
{
basename => 'thermus',
len => 1273,
bin_n => 10,
args => { descending => 1 },
bins => [ 833, 0, 0, 0, 0, 116, 87, 72, 96, 69 ],
},
{
basename => 'thermus',
len => 1273,
bin_n => 10,
args => { cumulative => 1, descending => 1 },
bins => [ 833, 833, 833, 833, 833, 949, 1036, 1108, 1204, 1273 ],
},
{
basename => 'thermus',
len => 1273,
bin_n => 6,
args => { percentile => 1, descending => 1 },
bins => [ 213, 213, 213, 213, 213, 208 ],
},
{
basename => 'thermus',
len => 1273,
bin_n => 6,
args => { percentile => 1, cumulative => 1, descending => 1 },
bins => [ 213, 426, 639, 852, 1065, 1273 ],
},
);
for my $data (@exp_data) {
my ($basename, $exp_len, $bin_n, $args, $exp_bins)
= @{$data}{ qw(basename len bin_n args bins) };
my $infile = file('test', "$basename.rates");
my $rates = $class->load($infile);
}
{
my $infile = file('test', 'supermatrix-CATG-A-sample.rate');
my $rates = $class->load($infile);
isa_ok $rates, $class, $infile;
cmp_ok $rates->mask_len, '==', 17167,
'read expected number of site rates';
my $bin_n = 10;
my $args = { percentile => 1 };
my @masks = $rates->bin_rates_masks($bin_n, $args);
my $alifile = file('test', 'supermatrix.ali');
my $ali = Bio::MUST::Core::Ali->load($alifile);
$ali->apply_mask( Bio::MUST::Core::SeqMask->variable_mask($ali) );
for my $i (0..$#masks) {
cmp_store(
obj => $masks[$i]->filtered_ali($ali),
method => 'store',
( run in 0.381 second using v1.01-cache-2.11-cpan-709fd43a63f )