Bio-BPWrapper
view release on metacpan or search on metacpan
lib/Bio/BPWrapper/SeqManipulations.pm view on Meta::CPAN
=encoding utf8
=head1 NAME
Bio::BPWrapper::SeqManipulations - Functions for L<bioseq>
=head1 SYNOPSIS
use Bio::BPWrapper::SeqManipulations;
# Set options hash ...
initialize(\%opts);
write_out(\%opts);
=cut
package Bio::BPWrapper::SeqManipulations;
use strict; # Still on 5.10, so need this for strict
use warnings;
use 5.010;
use Bio::Seq;
use Bio::SeqIO;
use File::Basename;
use Bio::Tools::CodonTable;
use Bio::DB::RefSeq;
use Bio::Tools::SeqStats;
use Bio::SeqUtils;
use Scalar::Util;
use Exporter ();
use Bio::CodonUsage::IO;
use Bio::Tools::pICalculator;
# use Bio::Tools::GuessSeqFormat;
if ($ENV{'DEBUG'}) { use Data::Dumper }
use vars qw(@ISA @EXPORT @EXPORT_OK);
@ISA = qw(Exporter);
#restrict_coord restrict_digest
@EXPORT = qw(initialize can_handle handle_opt write_out
print_composition filter_seqs retrieve_seqs remove_gaps
print_lengths print_seq_count make_revcom
print_subseq
anonymize
shred_seq count_codons print_gb_gene_feats
count_leading_gaps hydroB linearize reloop_at
remove_stop parse_orders find_by_order
pick_by_order del_by_order find_by_id
pick_by_id del_by_id find_by_re
pick_by_re del_by_re
pick_by_file del_by_file
find_by_ambig pick_by_ambig del_by_ambig find_by_length
del_by_length codon_sim codon_info);
# Package global variables
my ($in, $out, $seq, %opts, $filename, $in_format, $out_format, $guesser);
use Bio::BPWrapper;
my $VERSION = '1.0';
## For new options, just add an entry into this table with the same key as in
## the GetOpts function in the main program. Make the key be a reference to the handler subroutine (defined below), and test that it works.
my %opt_dispatch = (
'codon-table' => \&codon_table,
# 'codon-sim' => \&codon_sim,
'codon-info' => \&codon_info,
'iep' => \&iso_electric_point,
'composition' => \&print_composition,
'mol-wt' => \&print_weight,
'delete' => \&filter_seqs,
'fetch' => \&retrieve_seqs,
'no-gaps' => \&remove_gaps,
'length' => \&print_lengths,
'longest-orf' => \&update_longest_orf,
'num-seq' => \&print_seq_count,
'num-gaps-dna' => \&count_gaps_dna,
'num-gaps-aa' => \&count_gaps_aa,
'pick' => \&filter_seqs,
'revcom' => \&make_revcom,
'subseq' => \&print_subseq,
'translate' => \&reading_frame_ops,
# 'restrict-coord' => \&restrict_coord,
# 'restrict' => \&restrict_digest,
'anonymize' => \&anonymize,
'break' => \&shred_seq,
'count-codons' => \&count_codons,
'feat2fas' => \&print_gb_gene_feats,
'lead-gaps' => \&count_leading_gaps,
'hydroB' => \&hydroB,
'linearize' => \&linearize,
'reloop' => \&reloop_at,
'remove-stop' => \&remove_stop,
'sort' => \&sort_by,
'split-cdhit' => \&split_cdhit,
'syn-code' => \&synon_codons,
# 'dotplot' => \&draw_dotplot,
# 'extract' => \&reading_frame_ops,
# 'longest-orf' => \&reading_frame_ops,
# 'prefix' => \&anonymize,
'rename' => \&rename_id,
# 'slidingwindow' => \&sliding_window,
# 'split' => \&split_seqs,
);
my %filter_dispatch = (
'find_by_order' => \&find_by_order,
'pick_by_order' => \&pick_by_order,
'delete_by_order' => \&del_by_order,
'find_by_id' => \&find_by_id,
'pick_by_id' => \&pick_by_id,
'delete_by_id' => \&del_by_id,
'find_by_re' => \&find_by_re,
'pick_by_re' => \&pick_by_re,
'find_by_file' => \&find_by_file,
'pick_by_file' => \&pick_by_file,
'delete_by_file' => \&del_by_file,
'find_by_ambig' => \&find_by_ambig,
'pick_by_ambig' => \&pick_by_ambig,
'delete_by_ambig' => \&del_by_ambig,
'find_by_length' => \&find_by_length,
'pick_by_length' => \&pick_by_length,
'delete_by_length' => \&del_by_length,
);
##################### initializer & option handlers ###################
=head1 SUBROUTINES
=head2 initialize()
Sets up most of the actions to be performed on sequences.
Call this right after setting up an options hash.
Sets package variables: C<$in>, C<$in_format>, C<$filename>, C<$out_format>, and C<$out>.
=cut
sub initialize {
my $opts_ref = shift;
Bio::BPWrapper::common_opts($opts_ref);
%opts = %{$opts_ref};
die "Option 'prefix' requires a value\n" if defined $opts{"prefix"} && $opts{"prefix"} =~ /^$/;
$filename = shift @ARGV || "STDIN"; # If no more arguments were given on the command line, assume we're getting input from standard input
# guess format won't work for piped input; remove
# if ($filename eq "STDIN") {
# my $lines;
# my $line_ct = 0;
# while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
# $guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
# } else {
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $filename);
# }
# $in_format = $guesser->guess() unless $opts{'input'};
$in_format = $opts{"input"} // 'fasta';
# die "Reads only fasta, fastq, embl, genbank. Not aligment file formats like clustalw\n" unless $in_format =~ /fasta|fastq|embl|genbank/;
$in = Bio::SeqIO->new(-format => $in_format, ($filename eq "STDIN")? (-fh => \*STDIN) : (-file => $filename));
$out_format = $opts{"output"} // 'fasta';
# A change in SeqIO, commit 0e04486ca4cc2e61fd72, means -fh or -file is required
$out = Bio::SeqIO->new(-format => $out_format, -fh => \*STDOUT)
}
sub can_handle {
my $option = shift;
return defined($opt_dispatch{$option})
}
sub handle_opt {
my $option = shift;
$opt_dispatch{$option}->($option) # This passes option name to all functions
}
=head2 write_out()
Writes out the sequence file.
Call this after calling C<#initialize(\%opts)> and processing those options.
=cut
sub write_out {
while ($seq = $in->next_seq()) { $out->write_seq($seq) }
}
################### subroutines ########################
sub codon_sim {
# my $seq = $in->next_seq(); # only the first sequence used
# if (&_internal_stop_or_x($seq->translate()->seq())) {
# die "internal stop or non-standard base:\t" . $seq->id . "\texit.\n";
# }
# use Algorithm::Numerical::Sample qw /sample/;
# use Math::Random qw /random_permutation/;
########################
# Read CUTG and make a random codon set for each AA
########################
# my $cutg_file = $opts{'codon-sim'};
# my $io = Bio::CodonUsage::IO->new(-file => $cutg_file);
# my $cdtable = $io->next_data();
my @bases = qw(A T C G);
my @codons;
for (my $i=0; $i<=3; $i++) {
my $first = $bases[$i];
for (my $j=0; $j<=3; $j++) {
my $second = $bases[$j];
for (my $k=0; $k<=3; $k++) {
my $third = $bases[$k];
push @codons, $first . $second . $third;
}
( run in 0.896 second using v1.01-cache-2.11-cpan-39bf76dae61 )