Bio-BPWrapper

 view release on metacpan or  search on metacpan

lib/Bio/BPWrapper/AlnManipulations.pm  view on Meta::CPAN

    "split-cdhit" => \&split_cdhit,
    "uniq" => \&get_unique,
    "var-sites" => \&variable_sites,
    "window" => \&avg_id_by_win,
    "concat" => \&concat,
    "con-blocks" => \&conserved_blocks,
    "consensus" => \&get_consensus,
    "dna2pep" => \&dna_to_protein,
    "rm-col" => \&remove_gapped_cols_in_one_seq,
    "aln-index" => \&colnum_from_residue_pos,
    "list-ids" => \&list_ids,
    "pair-diff" => \&pair_diff,
    "pair-diff-ref" => \&pair_diff_ref,
    "permute-states" => \&permute_states,
    "pep2dna" => \&protein_to_dna,
    "resample" => \&sample_seqs,
    "shuffle-sites" => \&shuffle_sites,
    "select-third" => \&select_third_sites,
    "remove-third" => \&remove_third_sites,
    "random-slice" => \&random_slice,
    "upper" => \&upper_case,
    "gap-states" => \&gap_states,
    "gap-states2" => \&gap_states_matrix,
    "trim-ends" => \&trim_ends,
    "bin-inform" => \&binary_informative,
    "bin-ref" => \&binary_ref,
    "phy-nonint" => \&phylip_non_interleaved
   );


##################### initializer & option handlers ###################

## TODO Formal testing!

=head1 SUBROUTINES

=head2 initialize()

Sets up most of the actions to be performed on an alignment.

Call this right after setting up an options hash.

Sets package variables: C<$in_format>, C<$binary>, C<$out_format>, and C<$out>.

=cut

sub initialize {
    my $opts_ref = shift;
    Bio::BPWrapper::common_opts($opts_ref);
    %opts = %{$opts_ref};

    # This is the format that aln-manipulations expects by default
    my $default_format = "clustalw";

    # assume we're getting input from standard input

#    my $in_format = $opts{"input"} || $default_format;
#    my $in_format;
#    use IO::Scalar;
#    my $s;
#    my ($guesser);
#    if ($file eq "STDIN") {
#	my $line_ct = 0; 
#	my $lines;
#	while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
#	$guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
#   } else {
#	open $ifh, "<", $file or die $!;
#	$guesser = Bio::Tools::GuessSeqFormat->new( -file => $file );
#    }
#    $in_format  = $guesser->guess();
#    die "unknown file format. Try specify with -i flag.\n" unless $in_format;
#    seek (STDIN, 0, 0);
#    warn "$in_format\n";

    my $in_format = $opts{'input'} || 'clustalw';
    if ($opts{"concat"}) {
#	foreach my $file (glob @ARGV) {
	while ($file = shift @ARGV) {
#	    warn "reading $file\n";
#	       $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file);
#	       $in_format  = $guesser->guess;
	       $in = Bio::AlignIO->new(-file => $file, -format => $in_format);
	       while ($aln=$in->next_aln()) { push @alns, $aln }
	}
    } else {
	$file = shift @ARGV || "STDIN";    # If no more arguments were given on the command line
	if ($in_format && $in_format =~ /blast/) { # guess blastoutput as "phylip", so -i 'blast' is needed
#	if ($opts{"input"} && $opts{"input"} =~ /blast/) { # "blastxml" (-outfmt 5 ) preferred
	    my $searchio = Bio::SearchIO->new( -format => 'blast', ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file)); # works for regular blast output
#	    my $searchio = Bio::SearchIO->new( -format => 'blast', -fh => $ifh);
	    while ( my $result = $searchio->next_result() ) {
		while( my $hit = $result->next_hit ) {
 		    my $hsp = $hit->next_hsp; # get first hit; others ignored
		    $aln = $hsp->get_aln();
		}
	    }
	} else { # would throw error if format guessed wrong
#	    $in = Bio::AlignIO->new(-format => $in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file));
#	    $in = Bio::AlignIO->new(-format => $in_format, -fh => $ifh);
	    $in = Bio::AlignIO->new(-format=>$in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file) );
	    $aln = $in->next_aln()
	}
    }
    
    $binary = $opts{"binary"} ? 1 : 0;
    
    #### Options which *require an output FH* go *after* this ####
    $out_format = $opts{"output"} || $default_format;
    $out = Bio::AlignIO->new(-format => $out_format, -fh => \*STDOUT) unless $out_format eq 'paml'
}

sub can_handle {
    my $option = shift;
    return defined($opt_dispatch{$option})
}

sub handle_opt {
    my $option = shift;
    # This passes option name to all functions
    $opt_dispatch{$option}->($option)
}

=head2 write_out_paml()

Writes output in PAML format.

=cut

sub write_out_paml() {
    my @seq;
    my $ct=0;

    foreach my $seq ($aln->each_seq()) {
        my $id = $seq->display_id();
        if ($seq->seq() =~ /^-+$/) { print STDERR "all gaps: $file\t$id\n"; next }
        $ct++;
        push @seq, $seq
    }

    die "No computable sequences: less than 2 seq.\n" unless $ct >= 2;
    print $ct, "\t", $aln->length(), "\n";
    foreach (@seq) {
	   print $_->display_id(), "\n";
	   print $_->seq(), "\n"
    }
}

=head2 write_out()

Performs the bulk of the alignment actions actions set via
L<C<initialize(\%opts)>|/initialize> and calls
L<C<$AlignIO-E<gt>write_aln()>|https://metacpan.org/pod/Bio::AlignIO#write_aln>
or L<C<write_out_paml()>|/write_out_paml>.

Call this after calling C<#initialize(\%opts)>.

=cut



( run in 1.050 second using v1.01-cache-2.11-cpan-39bf76dae61 )