Bio-BPWrapper
view release on metacpan or search on metacpan
Revision history for Bio::BPWrappers
1.13 2020-02-02:
----------------
* revised for Bio::Perl 1.7.x; uses only Bio::Restriction::Analysis now
* command options regularized and documentation updated
* citations and doc links added and updated
* use Bio::Tools::GuessSeqFormat to guess format in `bioaln`, `bioseq` and `biopop`
* bioaln:
* fix bugs in file reading with `--concat`
* add ungapped diffs to `--pair-diff`
* bioseq:
* add pick/delete by file
* add `codon-table` interface to Tools::Codon:Table and Tools::TUPAC
* use `--gap char -` to change "." to "-"a
* modify `--delete` to delete by max number of x ambiguous (non-ATCGs) bases
* modify `-t1` to allow non-standard AA to pass (but not internal stops)
=item --gap-states
Prints one alignment gap per line, including its start, end, whether in-frame, whether on-edge, how many copies, and alignment length. (Can't remember what context this was developed at first; ignore)
=item --gap-states2
Prints one alignment gap per column, including its start-end as column heading and presence/absence (1/0) in each sequence.
=item --input, -i 'format'
Now it tries to guess the format. BLAST outputs still need to be specified
[Deprecated except for blast output] Specify input file format. Common ones include 'clustalw' (default), 'fasta' and 'phylip'. See L<Bio::AlignIO> for supported formats.
In addition, it reads NCBI-blast outputs as well. e.g., bioaln -i'blast' blast.out.
=item --length, -l
Print alignment length.
=item --listids, -L
Return the mean Kyte-Doolittle hydropathicity for protein sequences.
=item --iep
Return iso-electric point for a protein sequences as well as charges at a series of pH values. Depends on Bio::Tools::pICalculator.
=item --input, -i
Input file format. By default, this is 'fasta'. For Genbank format, use 'genbank'. For EMBL format, use 'embl'. For FASTQ, use 'fastq'
[We tried to guess the format using Bio::Tools::GuessSeqFormat, but it didn't work for pipe. Guess format will be delayed until this issue is fixed]
=item --lead-gaps | -G
Count and return the number of leading gaps in each sequence.
=item --length, -l
Print all sequence lengths.
=item --linearize, -L
lib/Bio/BPWrapper/AlnManipulations.pm view on Meta::CPAN
# This is the format that aln-manipulations expects by default
my $default_format = "clustalw";
# assume we're getting input from standard input
# my $in_format = $opts{"input"} || $default_format;
# my $in_format;
# use IO::Scalar;
# my $s;
# my ($guesser);
# if ($file eq "STDIN") {
# my $line_ct = 0;
# my $lines;
# while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
# $guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
# } else {
# open $ifh, "<", $file or die $!;
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file );
# }
# $in_format = $guesser->guess();
# die "unknown file format. Try specify with -i flag.\n" unless $in_format;
# seek (STDIN, 0, 0);
# warn "$in_format\n";
my $in_format = $opts{'input'} || 'clustalw';
if ($opts{"concat"}) {
# foreach my $file (glob @ARGV) {
while ($file = shift @ARGV) {
# warn "reading $file\n";
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file);
# $in_format = $guesser->guess;
$in = Bio::AlignIO->new(-file => $file, -format => $in_format);
while ($aln=$in->next_aln()) { push @alns, $aln }
}
} else {
$file = shift @ARGV || "STDIN"; # If no more arguments were given on the command line
if ($in_format && $in_format =~ /blast/) { # guess blastoutput as "phylip", so -i 'blast' is needed
# if ($opts{"input"} && $opts{"input"} =~ /blast/) { # "blastxml" (-outfmt 5 ) preferred
my $searchio = Bio::SearchIO->new( -format => 'blast', ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file)); # works for regular blast output
# my $searchio = Bio::SearchIO->new( -format => 'blast', -fh => $ifh);
while ( my $result = $searchio->next_result() ) {
while( my $hit = $result->next_hit ) {
my $hsp = $hit->next_hsp; # get first hit; others ignored
$aln = $hsp->get_aln();
}
}
} else { # would throw error if format guessed wrong
# $in = Bio::AlignIO->new(-format => $in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file));
# $in = Bio::AlignIO->new(-format => $in_format, -fh => $ifh);
$in = Bio::AlignIO->new(-format=>$in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file) );
$aln = $in->next_aln()
}
}
$binary = $opts{"binary"} ? 1 : 0;
#### Options which *require an output FH* go *after* this ####
lib/Bio/BPWrapper/SeqManipulations.pm view on Meta::CPAN
count_leading_gaps hydroB linearize reloop_at
remove_stop parse_orders find_by_order
pick_by_order del_by_order find_by_id
pick_by_id del_by_id find_by_re
pick_by_re del_by_re
pick_by_file del_by_file
find_by_ambig pick_by_ambig del_by_ambig find_by_length
del_by_length codon_sim codon_info);
# Package global variables
my ($in, $out, $seq, %opts, $filename, $in_format, $out_format, $guesser);
use Bio::BPWrapper;
my $VERSION = '1.0';
## For new options, just add an entry into this table with the same key as in
## the GetOpts function in the main program. Make the key be a reference to the handler subroutine (defined below), and test that it works.
my %opt_dispatch = (
'codon-table' => \&codon_table,
# 'codon-sim' => \&codon_sim,
'codon-info' => \&codon_info,
'iep' => \&iso_electric_point,
lib/Bio/BPWrapper/SeqManipulations.pm view on Meta::CPAN
sub initialize {
my $opts_ref = shift;
Bio::BPWrapper::common_opts($opts_ref);
%opts = %{$opts_ref};
die "Option 'prefix' requires a value\n" if defined $opts{"prefix"} && $opts{"prefix"} =~ /^$/;
$filename = shift @ARGV || "STDIN"; # If no more arguments were given on the command line, assume we're getting input from standard input
# guess format won't work for piped input; remove
# if ($filename eq "STDIN") {
# my $lines;
# my $line_ct = 0;
# while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
# $guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
# } else {
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $filename);
# }
# $in_format = $guesser->guess() unless $opts{'input'};
$in_format = $opts{"input"} // 'fasta';
# die "Reads only fasta, fastq, embl, genbank. Not aligment file formats like clustalw\n" unless $in_format =~ /fasta|fastq|embl|genbank/;
$in = Bio::SeqIO->new(-format => $in_format, ($filename eq "STDIN")? (-fh => \*STDIN) : (-file => $filename));
$out_format = $opts{"output"} // 'fasta';
# A change in SeqIO, commit 0e04486ca4cc2e61fd72, means -fh or -file is required
$out = Bio::SeqIO->new(-format => $out_format, -fh => \*STDOUT)
( run in 2.550 seconds using v1.01-cache-2.11-cpan-748bfb374f4 )