Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/split-rates-ali.pl  view on Meta::CPAN

            my $outfile = secure_outfile(
                change_suffix($infile, '.stats'), $ARGV_out_suffix
            );
            ### Dumping site-wise stats to: $outfile
            $rates->store($outfile);
        }
    }

    ### Computing masks from stats
    my %args;
    $args{percentile} = 1 if $ARGV_percentile;
    $args{cumulative} = 1 if $ARGV_cumulative;
    $args{descending} = 1 if $ARGV_descending;
    my @masks = $rates->bin_rates_masks($ARGV_bin_number, \%args);

    # output one Ali per bin
    for my $i (0..$#masks) {
        my $bin_ali = $masks[$i]->filtered_ali($ali);
        my $outfile = secure_outfile($alifile, $ARGV_out_suffix . "-bin$i");
        # Note: $ARGV_out_suffix defaults to q{} for smooth concatenation

bin/split-rates-ali.pl  view on Meta::CPAN

either delta rates or chi-square statistics dependending on the infiles type
(see C<--sitefreq> option).

=item --bin-number=<n>

Number of bins to define [default: 10].

=for Euclid: n.type:    number
    n.default: 10

=item --percentile

Define bins containing an equal number of sites rather than bins of equal width
in terms of rates [default: no].

=item --cumulative

Define bins including all previous bins [default: no]. This leads to ALI
outfiles of increasing width and only makes sense when slower sites are in
lower bins. If higher "rates" mean slower sites, use the --descending option.

lib/Bio/MUST/Core/Ali.pm  view on Meta::CPAN


=head2 seq_len_stats

Returns a list of 5 values summarizing the Ali seq lengths (ignoring gaps).
The values are the following: Q0 (min), Q1, Q2 (median), Q3, and Q4 (max).

This method does not accept any arguments.

=head2 perc_miss

Returns the percentage of missing (and gap-like) character states in the Ali.

As this method internally calls C<Ali::width>, the remarks above also apply.

    my $miss_level = $ali->perc_miss;

This method does not accept any arguments.

=head1 MUTATORS

=head2 uc_seqs

lib/Bio/MUST/Core/SeqMask/Rates.pm  view on Meta::CPAN



# small delta for slightly increasing extreme bins
const my $DELTA => 1e-13;

sub bin_rates_masks {
    my $self  = shift;
    my $bin_n = shift;
    my $args  = shift // {};            # HashRef (should not be empty...)

    my $percentile = $args->{percentile} // 0;
    my $cumulative = $args->{cumulative} // 0;
    my $descending = $args->{descending} // 0;

    my @masks;

    # define bin bounds based on equal count (in terms of sites)
    if ($percentile) {

        # create rates-sorted index of sites (from slow to fast)
        my @index = sort {
            $self->state_at($a) <=> $self->state_at($b)
        } 0..$self->mask_len-1;

        # optionally reverse index: higher values mean slower rates (TIGER)
        @index = reverse @index if $descending;

        # compute masks from index slices

t/rates.t  view on Meta::CPAN

        basename => 'primate',
        len => 898,
        bin_n => 10,
        args => { cumulative => 1, descending => 1 },
        bins => [ 373, 373, 376, 376, 376, 444, 517, 554, 706, 898 ],
    },
    {
        basename => 'primate',
        len => 898,
        bin_n => 5,
        args => { percentile => 1, descending => 1 },
        bins => [ 180, 180, 180, 180, 178 ],
    },
    {
        basename => 'primate',
        len => 898,
        bin_n => 5,
        args => { cumulative => 1, percentile => 1, descending => 1 },
        bins => [ 180, 360, 540, 720, 898 ],
    },
    {
        basename => 'thermus',
        len => 1273,
        bin_n => 10,
        args => { descending => 1 },
        bins => [ 833, 0, 0, 0, 0, 116, 87, 72, 96, 69 ],
    },
    {
        basename => 'thermus',
        len => 1273,
        bin_n => 10,
        args => { cumulative => 1, descending => 1 },
        bins => [ 833, 833, 833, 833, 833, 949, 1036, 1108, 1204, 1273 ],
    },
    {
        basename => 'thermus',
        len => 1273,
        bin_n => 6,
        args => { percentile => 1, descending => 1 },
        bins => [ 213, 213, 213, 213, 213, 208 ],
    },
    {
        basename => 'thermus',
        len => 1273,
        bin_n => 6,
        args => { percentile => 1, cumulative => 1, descending => 1 },
        bins => [ 213, 426, 639, 852, 1065, 1273 ],
    },
);

for my $data (@exp_data) {
    my ($basename, $exp_len, $bin_n, $args, $exp_bins)
        = @{$data}{ qw(basename len bin_n args bins) };

    my $infile = file('test', "$basename.rates");
    my $rates = $class->load($infile);

t/rates.t  view on Meta::CPAN

}

{
    my $infile = file('test', 'supermatrix-CATG-A-sample.rate');
    my $rates = $class->load($infile);
    isa_ok $rates, $class, $infile;
    cmp_ok $rates->mask_len, '==', 17167,
        'read expected number of site rates';

    my $bin_n = 10;
    my $args = { percentile => 1 };
    my @masks = $rates->bin_rates_masks($bin_n, $args);

    my $alifile = file('test', 'supermatrix.ali');
    my $ali = Bio::MUST::Core::Ali->load($alifile);
    $ali->apply_mask( Bio::MUST::Core::SeqMask->variable_mask($ali) );

    for my $i (0..$#masks) {
        cmp_store(
            obj    => $masks[$i]->filtered_ali($ali),
            method => 'store',



( run in 0.381 second using v1.01-cache-2.11-cpan-709fd43a63f )