Bio-BPWrapper
view release on metacpan or search on metacpan
lib/Bio/BPWrapper/AlnManipulations.pm view on Meta::CPAN
"split-cdhit" => \&split_cdhit,
"uniq" => \&get_unique,
"var-sites" => \&variable_sites,
"window" => \&avg_id_by_win,
"concat" => \&concat,
"con-blocks" => \&conserved_blocks,
"consensus" => \&get_consensus,
"dna2pep" => \&dna_to_protein,
"rm-col" => \&remove_gapped_cols_in_one_seq,
"aln-index" => \&colnum_from_residue_pos,
"list-ids" => \&list_ids,
"pair-diff" => \&pair_diff,
"pair-diff-ref" => \&pair_diff_ref,
"permute-states" => \&permute_states,
"pep2dna" => \&protein_to_dna,
"resample" => \&sample_seqs,
"shuffle-sites" => \&shuffle_sites,
"select-third" => \&select_third_sites,
"remove-third" => \&remove_third_sites,
"random-slice" => \&random_slice,
"upper" => \&upper_case,
"gap-states" => \&gap_states,
"gap-states2" => \&gap_states_matrix,
"trim-ends" => \&trim_ends,
"bin-inform" => \&binary_informative,
"bin-ref" => \&binary_ref,
"phy-nonint" => \&phylip_non_interleaved
);
##################### initializer & option handlers ###################
## TODO Formal testing!
=head1 SUBROUTINES
=head2 initialize()
Sets up most of the actions to be performed on an alignment.
Call this right after setting up an options hash.
Sets package variables: C<$in_format>, C<$binary>, C<$out_format>, and C<$out>.
=cut
sub initialize {
my $opts_ref = shift;
Bio::BPWrapper::common_opts($opts_ref);
%opts = %{$opts_ref};
# This is the format that aln-manipulations expects by default
my $default_format = "clustalw";
# assume we're getting input from standard input
# my $in_format = $opts{"input"} || $default_format;
# my $in_format;
# use IO::Scalar;
# my $s;
# my ($guesser);
# if ($file eq "STDIN") {
# my $line_ct = 0;
# my $lines;
# while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
# $guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
# } else {
# open $ifh, "<", $file or die $!;
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file );
# }
# $in_format = $guesser->guess();
# die "unknown file format. Try specify with -i flag.\n" unless $in_format;
# seek (STDIN, 0, 0);
# warn "$in_format\n";
my $in_format = $opts{'input'} || 'clustalw';
if ($opts{"concat"}) {
# foreach my $file (glob @ARGV) {
while ($file = shift @ARGV) {
# warn "reading $file\n";
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file);
# $in_format = $guesser->guess;
$in = Bio::AlignIO->new(-file => $file, -format => $in_format);
while ($aln=$in->next_aln()) { push @alns, $aln }
}
} else {
$file = shift @ARGV || "STDIN"; # If no more arguments were given on the command line
if ($in_format && $in_format =~ /blast/) { # guess blastoutput as "phylip", so -i 'blast' is needed
# if ($opts{"input"} && $opts{"input"} =~ /blast/) { # "blastxml" (-outfmt 5 ) preferred
my $searchio = Bio::SearchIO->new( -format => 'blast', ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file)); # works for regular blast output
# my $searchio = Bio::SearchIO->new( -format => 'blast', -fh => $ifh);
while ( my $result = $searchio->next_result() ) {
while( my $hit = $result->next_hit ) {
my $hsp = $hit->next_hsp; # get first hit; others ignored
$aln = $hsp->get_aln();
}
}
} else { # would throw error if format guessed wrong
# $in = Bio::AlignIO->new(-format => $in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file));
# $in = Bio::AlignIO->new(-format => $in_format, -fh => $ifh);
$in = Bio::AlignIO->new(-format=>$in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file) );
$aln = $in->next_aln()
}
}
$binary = $opts{"binary"} ? 1 : 0;
#### Options which *require an output FH* go *after* this ####
$out_format = $opts{"output"} || $default_format;
$out = Bio::AlignIO->new(-format => $out_format, -fh => \*STDOUT) unless $out_format eq 'paml'
}
sub can_handle {
my $option = shift;
return defined($opt_dispatch{$option})
}
sub handle_opt {
my $option = shift;
# This passes option name to all functions
$opt_dispatch{$option}->($option)
}
=head2 write_out_paml()
Writes output in PAML format.
=cut
sub write_out_paml() {
my @seq;
my $ct=0;
foreach my $seq ($aln->each_seq()) {
my $id = $seq->display_id();
if ($seq->seq() =~ /^-+$/) { print STDERR "all gaps: $file\t$id\n"; next }
$ct++;
push @seq, $seq
}
die "No computable sequences: less than 2 seq.\n" unless $ct >= 2;
print $ct, "\t", $aln->length(), "\n";
foreach (@seq) {
print $_->display_id(), "\n";
print $_->seq(), "\n"
}
}
=head2 write_out()
Performs the bulk of the alignment actions actions set via
L<C<initialize(\%opts)>|/initialize> and calls
L<C<$AlignIO-E<gt>write_aln()>|https://metacpan.org/pod/Bio::AlignIO#write_aln>
or L<C<write_out_paml()>|/write_out_paml>.
Call this after calling C<#initialize(\%opts)>.
=cut
( run in 1.050 second using v1.01-cache-2.11-cpan-39bf76dae61 )