Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/ali2phylip.pl  view on Meta::CPAN

    dump_stats($outfile, $ali, 'out');

    # only write actual phylip file if not in test mode
    unless ($out) {
        my $method = $ARGV_ali ? 'store' : 'store_phylip';
        my $args = { clean => 1, $ARGV_p80 ? (short => 0, chunk => -1) : () };
        $ali->$method($outfile, $args);
    }
}

# wrapper to native methods to transparently handle codon_mask
sub _apply_mask {
    my $ali  = shift;
    my $mask = shift;

    if ($ARGV_keep_codons) {
        $mask = $mask->codon_mask( {
            frame => $ARGV_coding_frame,
              max => $ARGV_codon_max_nt_drop,
        } );

lib/Bio/MUST/Core.pm  view on Meta::CPAN

Bio::MUST::Core - Core classes and utilities for Bio::MUST

=head1 VERSION

version 0.252040

=head1 DESCRIPTION

This distribution is the base of the C<Bio::MUST> module collection designed
for writing phylogenomic applications in Perl. Their main strength lies in
their transparent handling of the NCBI Taxonomy database (see
L<https://www.ncbi.nlm.nih.gov/taxonomy>), for example to automatically label
ancestral nodes in phylogenetic trees.

C<Bio::MUST> classes do not need (and are not meant as a replacement for)
L<BioPerl>. In contrast, they depend on both L<Bio::LITE::Taxonomy> and
L<Bio::Phylo>, two non-BioPerl distribution for dealing with biological data.

C<Bio::MUST> modules have been used in production since 2013 but are not yet
ready for wider adoption due to their lack of documentation. This should
improve over time. Meanwhile, adventurous users can have a look at the

lib/Bio/MUST/Core/Ali.pm  view on Meta::CPAN

    # $seq_coords is [ 3, 23, 59, 71,  71,  74 ]

This method requires two arguments: the id of a sequence and an array
reference of input sites in Ali coordinates.

=head1 I/O METHODS

=head2 load

Class method (constructor) returning a new Ali read from disk. This method
will transparently import plain FASTA files in addition to the MUST
pseudo-FASTA format (ALI files).

    use Test::Deeply;
    use aliased 'Bio::MUST::Core::Ali';
    my $ali1 = Ali->load('example.ali');
    my $ali2 = Ali->load('example.fasta');
    my @seqs1 = $ali1->all_seqs;
    my @seqs2 = $ali2->all_seqs;
    is_deeply, \@seqs1, \@seqs2, 'should be true';

lib/Bio/MUST/Core/Ali.pm  view on Meta::CPAN

This method requires one argument.

=head2 load_tinyseq

Class method (constructor) returning a new Ali read from a file in NCBI
TinySeq XML format.

=head2 instant_store

Class method intended to transform a large sequence file read from disk
without loading it in memory. This method will transparently process plain
FASTA files in addition to the MUST pseudo-FASTA format (ALI files).

    my $chunk = 200;

    my $split = sub {
        my $seq = shift;
        my $base_id = ( split /\s+/xms, $seq->full_id )[0];
        my $max_pos = $seq->seq_len - $chunk;
        my $n = 0;
        my $out_str;

lib/Bio/MUST/Core/Ali.pm  view on Meta::CPAN

    );

This method requires two arguments. The sercond is a hash reference that must
contain the following keys:
    - infile:  input sequence file
    - coderef: subroutine implementing the transforming logic

=head2 instant_count

Class method returning the number of seqs in any sequence file read from disk
without loading it in memory. This method will transparently process plain
FASTA files in addition to the MUST pseudo-FASTA format (ALI files).

    use aliased 'Bio::MUST::Core::Ali';
    my $seq_n = Ali->instant_count('input.ali');
    say $seq_n;

=head1 ALIASES

=head2 height

lib/Bio/MUST/Core/Ali/Stash.pm  view on Meta::CPAN


This method accepts just one argument (and not an array slice).

It is a faster implementation of the same method from the C<Ali> class.

=head1 I/O METHODS

=head2 load

Class method (constructor) returning a new Ali::Stash read from disk. As in
C<Ali>, this method will transparently import plain FASTA files in addition to
the MUST pseudo-FASTA format (ALI files).

    # load database
    my $db = Stash->load( 'database.fasta' );

    # alternatively... (indexing only accessions)
    my $db = Stash->load( 'database.fasta', { truncate_ids => 1 } );

This method requires one argument and accepts a second optional argument
controlling the way sequence ids are processed. It is a hash reference that

lib/Bio/MUST/Core/IdList.pm  view on Meta::CPAN

}


sub _ali_from_list_ {
    my $self    = shift;
    my $reorder = shift;
    my $ali     = shift;
    my $lookup  = shift;        # optional IdList indexing the Ali

    # override passed lookup with internal lookup if available
    # Note: this allows Stash lookups to be used transparently
    $lookup = $ali->lookup if $ali->can('lookup');

    # TODO: warn for missing ids in Ali?

    # create new Ali object (extending header comment)
    # TODO: allow custom comments
    my $new_ali = Ali->new(
        comments => [ $ali->all_comments,
            'built by ' . ($reorder ? 'reordered_ali' : 'filtered_ali')
        ],

lib/Bio/MUST/Core/SeqId.pm  view on Meta::CPAN

        return;
    }

    # check full_id validity
    my  ($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
        = $self->full_id =~ $FULL_ID;
    unless (defined $genus) {

        # First try to coerce foreign full_id by replacing 1st '_' by ' '. If
        # this does not work, keep the original full_id and flag it as foreign.
        # This approach allows the transparent conversion of valid full_ids
        # from foreign software able to handle unlimited gap-free ids.
        # Note: This will fails if the optional family part contains an '_'.

        my $cand_id = $self->full_id =~ s{_}{ }xmsr;
        ($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
            = $cand_id =~ $FULL_ID;
        unless (defined $genus) {
            $self->_set_foreign;
            return;
        }

lib/Bio/MUST/Core/Taxonomy.pm  view on Meta::CPAN


# tree annotation methods


sub attach_taxonomies_to_terminals {
    my $self = shift;
    my $tree = shift;

    #### ATTACHING TAXONOMIES TO TERMINALS...

    # transparently fetch Bio::Phylo component object
    $tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');

    # store tip taxonomies in Bio::Phylo::Forest::Node generic attributes
    for my $tip ( @{ $tree->get_terminals } ) {

        # fetch taxonomy (and level list) from tip's seq id
        my @tax = $self->get_taxonomy_with_levels_from_seq_id($tip->get_name);

        # attach them as distinct ArrayRefs
        $tip->set_generic('taxonomy' => [ map { $_->[0] } @tax ] );

lib/Bio/MUST/Core/Tree/Splits.pm  view on Meta::CPAN

    return join q{},
        zip_by { $xor_for{"$_[0]$_[1]"} } map { [ split // ] } @_[1..2];
}


sub get_node_for_split {
    my $self   = shift;
    my $tree   = shift;
    my $bp_key = shift;

    # transparently fetch Bio::Phylo component object
    # TODO: avoid code repetition?
    $tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');

    my $comp_bp_key = $bp_key =~ tr/.*/*./r;

    NODE:
    for my $node ( @{ $tree->get_entities } ) {
        my $node_key = $self->node2key($node);
        next NODE unless $node_key;
        return $node if $node_key eq $bp_key || $node_key eq $comp_bp_key;

lib/Bio/MUST/Core/Tree/Splits.pm  view on Meta::CPAN

#     );
#
#     return $splits;
# }


sub new_from_tree {
    my $class = shift;
    my $tree  = shift;

    # transparently fetch Bio::Phylo component object
    # TODO: avoid code repetition?
    $tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');

    # build lookup as fast as possible (no tree visitor method)
    my $lookup = IdList->new(
        ids => [ map { $_->get_name } @{ $tree->get_terminals } ]
    );

    # instantiate Splits object to benefit from ids2key method
    my $splits = $class->new(



( run in 0.836 second using v1.01-cache-2.11-cpan-a1d94b6210f )