view release on metacpan or search on metacpan
bin/ali2phylip.pl view on Meta::CPAN
dump_stats($outfile, $ali, 'out');
# only write actual phylip file if not in test mode
unless ($out) {
my $method = $ARGV_ali ? 'store' : 'store_phylip';
my $args = { clean => 1, $ARGV_p80 ? (short => 0, chunk => -1) : () };
$ali->$method($outfile, $args);
}
}
# wrapper to native methods to transparently handle codon_mask
sub _apply_mask {
my $ali = shift;
my $mask = shift;
if ($ARGV_keep_codons) {
$mask = $mask->codon_mask( {
frame => $ARGV_coding_frame,
max => $ARGV_codon_max_nt_drop,
} );
lib/Bio/MUST/Core.pm view on Meta::CPAN
Bio::MUST::Core - Core classes and utilities for Bio::MUST
=head1 VERSION
version 0.252040
=head1 DESCRIPTION
This distribution is the base of the C<Bio::MUST> module collection designed
for writing phylogenomic applications in Perl. Their main strength lies in
their transparent handling of the NCBI Taxonomy database (see
L<https://www.ncbi.nlm.nih.gov/taxonomy>), for example to automatically label
ancestral nodes in phylogenetic trees.
C<Bio::MUST> classes do not need (and are not meant as a replacement for)
L<BioPerl>. In contrast, they depend on both L<Bio::LITE::Taxonomy> and
L<Bio::Phylo>, two non-BioPerl distribution for dealing with biological data.
C<Bio::MUST> modules have been used in production since 2013 but are not yet
ready for wider adoption due to their lack of documentation. This should
improve over time. Meanwhile, adventurous users can have a look at the
lib/Bio/MUST/Core/Ali.pm view on Meta::CPAN
# $seq_coords is [ 3, 23, 59, 71, 71, 74 ]
This method requires two arguments: the id of a sequence and an array
reference of input sites in Ali coordinates.
=head1 I/O METHODS
=head2 load
Class method (constructor) returning a new Ali read from disk. This method
will transparently import plain FASTA files in addition to the MUST
pseudo-FASTA format (ALI files).
use Test::Deeply;
use aliased 'Bio::MUST::Core::Ali';
my $ali1 = Ali->load('example.ali');
my $ali2 = Ali->load('example.fasta');
my @seqs1 = $ali1->all_seqs;
my @seqs2 = $ali2->all_seqs;
is_deeply, \@seqs1, \@seqs2, 'should be true';
lib/Bio/MUST/Core/Ali.pm view on Meta::CPAN
This method requires one argument.
=head2 load_tinyseq
Class method (constructor) returning a new Ali read from a file in NCBI
TinySeq XML format.
=head2 instant_store
Class method intended to transform a large sequence file read from disk
without loading it in memory. This method will transparently process plain
FASTA files in addition to the MUST pseudo-FASTA format (ALI files).
my $chunk = 200;
my $split = sub {
my $seq = shift;
my $base_id = ( split /\s+/xms, $seq->full_id )[0];
my $max_pos = $seq->seq_len - $chunk;
my $n = 0;
my $out_str;
lib/Bio/MUST/Core/Ali.pm view on Meta::CPAN
);
This method requires two arguments. The sercond is a hash reference that must
contain the following keys:
- infile: input sequence file
- coderef: subroutine implementing the transforming logic
=head2 instant_count
Class method returning the number of seqs in any sequence file read from disk
without loading it in memory. This method will transparently process plain
FASTA files in addition to the MUST pseudo-FASTA format (ALI files).
use aliased 'Bio::MUST::Core::Ali';
my $seq_n = Ali->instant_count('input.ali');
say $seq_n;
=head1 ALIASES
=head2 height
lib/Bio/MUST/Core/Ali/Stash.pm view on Meta::CPAN
This method accepts just one argument (and not an array slice).
It is a faster implementation of the same method from the C<Ali> class.
=head1 I/O METHODS
=head2 load
Class method (constructor) returning a new Ali::Stash read from disk. As in
C<Ali>, this method will transparently import plain FASTA files in addition to
the MUST pseudo-FASTA format (ALI files).
# load database
my $db = Stash->load( 'database.fasta' );
# alternatively... (indexing only accessions)
my $db = Stash->load( 'database.fasta', { truncate_ids => 1 } );
This method requires one argument and accepts a second optional argument
controlling the way sequence ids are processed. It is a hash reference that
lib/Bio/MUST/Core/IdList.pm view on Meta::CPAN
}
sub _ali_from_list_ {
my $self = shift;
my $reorder = shift;
my $ali = shift;
my $lookup = shift; # optional IdList indexing the Ali
# override passed lookup with internal lookup if available
# Note: this allows Stash lookups to be used transparently
$lookup = $ali->lookup if $ali->can('lookup');
# TODO: warn for missing ids in Ali?
# create new Ali object (extending header comment)
# TODO: allow custom comments
my $new_ali = Ali->new(
comments => [ $ali->all_comments,
'built by ' . ($reorder ? 'reordered_ali' : 'filtered_ali')
],
lib/Bio/MUST/Core/SeqId.pm view on Meta::CPAN
return;
}
# check full_id validity
my ($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
= $self->full_id =~ $FULL_ID;
unless (defined $genus) {
# First try to coerce foreign full_id by replacing 1st '_' by ' '. If
# this does not work, keep the original full_id and flag it as foreign.
# This approach allows the transparent conversion of valid full_ids
# from foreign software able to handle unlimited gap-free ids.
# Note: This will fails if the optional family part contains an '_'.
my $cand_id = $self->full_id =~ s{_}{ }xmsr;
($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
= $cand_id =~ $FULL_ID;
unless (defined $genus) {
$self->_set_foreign;
return;
}
lib/Bio/MUST/Core/Taxonomy.pm view on Meta::CPAN
# tree annotation methods
sub attach_taxonomies_to_terminals {
my $self = shift;
my $tree = shift;
#### ATTACHING TAXONOMIES TO TERMINALS...
# transparently fetch Bio::Phylo component object
$tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');
# store tip taxonomies in Bio::Phylo::Forest::Node generic attributes
for my $tip ( @{ $tree->get_terminals } ) {
# fetch taxonomy (and level list) from tip's seq id
my @tax = $self->get_taxonomy_with_levels_from_seq_id($tip->get_name);
# attach them as distinct ArrayRefs
$tip->set_generic('taxonomy' => [ map { $_->[0] } @tax ] );
lib/Bio/MUST/Core/Tree/Splits.pm view on Meta::CPAN
return join q{},
zip_by { $xor_for{"$_[0]$_[1]"} } map { [ split // ] } @_[1..2];
}
sub get_node_for_split {
my $self = shift;
my $tree = shift;
my $bp_key = shift;
# transparently fetch Bio::Phylo component object
# TODO: avoid code repetition?
$tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');
my $comp_bp_key = $bp_key =~ tr/.*/*./r;
NODE:
for my $node ( @{ $tree->get_entities } ) {
my $node_key = $self->node2key($node);
next NODE unless $node_key;
return $node if $node_key eq $bp_key || $node_key eq $comp_bp_key;
lib/Bio/MUST/Core/Tree/Splits.pm view on Meta::CPAN
# );
#
# return $splits;
# }
sub new_from_tree {
my $class = shift;
my $tree = shift;
# transparently fetch Bio::Phylo component object
# TODO: avoid code repetition?
$tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');
# build lookup as fast as possible (no tree visitor method)
my $lookup = IdList->new(
ids => [ map { $_->get_name } @{ $tree->get_terminals } ]
);
# instantiate Splits object to benefit from ids2key method
my $splits = $class->new(