Bio-BPWrapper

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

Revision history for Bio::BPWrappers

1.13 2020-02-02:
----------------

* revised for Bio::Perl 1.7.x; uses only Bio::Restriction::Analysis now
* command options regularized and documentation updated
* citations and doc links added and updated
* use Bio::Tools::GuessSeqFormat to guess format in `bioaln`, `bioseq` and `biopop`

* bioaln:
  * fix bugs in file reading with `--concat`
  * add ungapped diffs to `--pair-diff`
* bioseq:
  * add pick/delete by file
  * add `codon-table` interface to Tools::Codon:Table and Tools::TUPAC
  * use `--gap char -` to change "." to "-"a
  * modify `--delete` to delete by max number of x ambiguous (non-ATCGs) bases
  * modify `-t1` to allow non-standard AA to pass (but not internal stops)

bin/bioaln  view on Meta::CPAN

=item --gap-states

Prints one alignment gap per line, including its start, end, whether in-frame, whether on-edge, how many copies, and alignment length. (Can't remember what context this was developed at first; ignore)

=item --gap-states2

Prints one alignment gap per column, including its start-end as column heading and presence/absence (1/0) in each sequence.

=item --input, -i 'format'

Now it tries to guess the format. BLAST outputs still need to be specified

[Deprecated except for blast output] Specify input file format. Common ones include 'clustalw' (default), 'fasta' and 'phylip'. See L<Bio::AlignIO> for supported formats.

In addition, it reads NCBI-blast outputs as well. e.g., bioaln -i'blast' blast.out.

=item --length, -l

Print alignment length.

=item --listids, -L

bin/bioseq  view on Meta::CPAN

Return the mean Kyte-Doolittle hydropathicity for protein sequences.

=item --iep

Return iso-electric point for a protein sequences as well as charges at a series of pH values. Depends on Bio::Tools::pICalculator.

=item --input, -i

Input file format. By default, this is 'fasta'. For Genbank format, use 'genbank'. For EMBL format, use 'embl'. For FASTQ, use 'fastq'

[We tried to guess the format using Bio::Tools::GuessSeqFormat, but it didn't work for pipe. Guess format will be delayed until this issue is fixed]

=item --lead-gaps | -G

Count and return the number of leading gaps in each sequence.

=item --length, -l

Print all sequence lengths.

=item --linearize, -L

lib/Bio/BPWrapper/AlnManipulations.pm  view on Meta::CPAN


    # This is the format that aln-manipulations expects by default
    my $default_format = "clustalw";

    # assume we're getting input from standard input

#    my $in_format = $opts{"input"} || $default_format;
#    my $in_format;
#    use IO::Scalar;
#    my $s;
#    my ($guesser);
#    if ($file eq "STDIN") {
#	my $line_ct = 0; 
#	my $lines;
#	while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
#	$guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
#   } else {
#	open $ifh, "<", $file or die $!;
#	$guesser = Bio::Tools::GuessSeqFormat->new( -file => $file );
#    }
#    $in_format  = $guesser->guess();
#    die "unknown file format. Try specify with -i flag.\n" unless $in_format;
#    seek (STDIN, 0, 0);
#    warn "$in_format\n";

    my $in_format = $opts{'input'} || 'clustalw';
    if ($opts{"concat"}) {
#	foreach my $file (glob @ARGV) {
	while ($file = shift @ARGV) {
#	    warn "reading $file\n";
#	       $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file);
#	       $in_format  = $guesser->guess;
	       $in = Bio::AlignIO->new(-file => $file, -format => $in_format);
	       while ($aln=$in->next_aln()) { push @alns, $aln }
	}
    } else {
	$file = shift @ARGV || "STDIN";    # If no more arguments were given on the command line
	if ($in_format && $in_format =~ /blast/) { # guess blastoutput as "phylip", so -i 'blast' is needed
#	if ($opts{"input"} && $opts{"input"} =~ /blast/) { # "blastxml" (-outfmt 5 ) preferred
	    my $searchio = Bio::SearchIO->new( -format => 'blast', ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file)); # works for regular blast output
#	    my $searchio = Bio::SearchIO->new( -format => 'blast', -fh => $ifh);
	    while ( my $result = $searchio->next_result() ) {
		while( my $hit = $result->next_hit ) {
 		    my $hsp = $hit->next_hsp; # get first hit; others ignored
		    $aln = $hsp->get_aln();
		}
	    }
	} else { # would throw error if format guessed wrong
#	    $in = Bio::AlignIO->new(-format => $in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file));
#	    $in = Bio::AlignIO->new(-format => $in_format, -fh => $ifh);
	    $in = Bio::AlignIO->new(-format=>$in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file) );
	    $aln = $in->next_aln()
	}
    }
    
    $binary = $opts{"binary"} ? 1 : 0;
    
    #### Options which *require an output FH* go *after* this ####

lib/Bio/BPWrapper/SeqManipulations.pm  view on Meta::CPAN

count_leading_gaps hydroB linearize reloop_at
remove_stop parse_orders find_by_order
pick_by_order del_by_order find_by_id
pick_by_id del_by_id find_by_re
pick_by_re del_by_re
pick_by_file del_by_file
find_by_ambig pick_by_ambig del_by_ambig find_by_length
del_by_length codon_sim codon_info);

# Package global variables
my ($in, $out, $seq, %opts, $filename, $in_format, $out_format, $guesser);
use Bio::BPWrapper;
my $VERSION = '1.0';

## For new options, just add an entry into this table with the same key as in
## the GetOpts function in the main program. Make the key be a reference to the handler subroutine (defined below), and test that it works.
my %opt_dispatch = (
    'codon-table' => \&codon_table,
#    'codon-sim' => \&codon_sim,
    'codon-info' => \&codon_info,
    'iep' => \&iso_electric_point,

lib/Bio/BPWrapper/SeqManipulations.pm  view on Meta::CPAN


sub initialize {
    my $opts_ref = shift;
    Bio::BPWrapper::common_opts($opts_ref);
    %opts = %{$opts_ref};

    die "Option 'prefix' requires a value\n" if defined $opts{"prefix"} && $opts{"prefix"} =~ /^$/;

    $filename = shift @ARGV || "STDIN";    # If no more arguments were given on the command line, assume we're getting input from standard input

# guess format won't work for piped input; remove
#    if ($filename eq "STDIN") {
#	my $lines; 
#	my $line_ct = 0; 
#	while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
#	$guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
#    } else {
#	$guesser = Bio::Tools::GuessSeqFormat->new( -file => $filename);
#    }
#    $in_format  = $guesser->guess() unless $opts{'input'};

    $in_format = $opts{"input"} // 'fasta';

#    die "Reads only fasta, fastq, embl, genbank. Not aligment file formats like clustalw\n" unless $in_format =~ /fasta|fastq|embl|genbank/;
    $in = Bio::SeqIO->new(-format => $in_format, ($filename eq "STDIN")? (-fh => \*STDIN) : (-file => $filename));

    $out_format = $opts{"output"} // 'fasta';

# A change in SeqIO, commit 0e04486ca4cc2e61fd72, means -fh or -file is required
    $out = Bio::SeqIO->new(-format => $out_format, -fh => \*STDOUT)



( run in 2.550 seconds using v1.01-cache-2.11-cpan-748bfb374f4 )