BioPerl

 view release on metacpan or  search on metacpan

scripts/utilities/bp_sreformat.pl  view on Meta::CPAN

#!perl
# Author:  Jason Stajich <jason-at-bioperl-dot-org>
# Purpose: Bioperl implementation of Sean Eddy's sreformat
#          We're not as clever as Sean's squid library though so
#          you have to specify the input format rather than letting
#          the application guess.

use strict;
use warnings;
use Bio::SeqIO;
use Bio::AlignIO;
use Getopt::Long;

my $USAGE = "bp_sreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT

-h/--help               Print this help
-if/--informat          Specify the input format
-of/--outformat         Specify the output format
-i/--input              Specify the input file name
                        (to pass in data on STDIN use minus sign as filename)
-o/--output             Specify the output file name
                        (to pass data out on STDOUT use minus sign as filename)
--msa                   Specify this is multiple sequence alignment data
--special=specialparams Specify special params supported by some formats
                        Comma or space separated please.
                        These include:
                        nointerleaved   -- for phylip,non-interleaved format
                        idlinebreak     -- for phylip, makes it molphy format
                        percentages     -- for clustalw, show % id per line
                        flat            -- don't show start-end in seqid
                        linelength      -- line length for clustalw
                        mrbayes         -- for MrBayes proper NEXUS output
";


my ($input,$output,$informat,$outformat,$msa,$special);

GetOptions(
	   'h|help'          => sub { print STDERR ($USAGE); exit(0) },
	   'i|input:s'         => \$input,
	   'o|output:s'        => \$output,
	   'if|informat:s'     => \$informat,
	   'of|outformat:s'    => \$outformat,
	   'msa'               => \$msa,
	   's|special:s'       => \$special,
	   );

unless( defined $informat && defined $outformat ) { 
    die(sprintf("Cannot proceed without a defined input and output you gave (%s,%s)\n",
		defined $informat ? $informat : "''" ,
		defined $outformat ? $outformat : "''"));
}

my ($in,$out);
my @extra;
if( $special ) {
    @extra = map { my @rc;
		   if( /nointerleaved/) {
		       @rc = ('-interleaved' => '0');
		   } elsif( /mrbayes/ ) {
		       @rc = ('-show_symbols' => 0,
			      '-show_endblock' => 0);
		   } elsif( /(\S+)\=(\S+)/ ) { @rc = ( "-$1" => $2) } 
	           else{ @rc = ("-$_" => 1) }
		   @rc;
	       } split(/[\s,]/,$special);
}
# guess we're talking about MSA if any of the standard MSA names are used
if( $informat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ||
    $outformat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ) {
    $msa = 1;
}

if( $msa ) {
    eval {
	if( defined $input ) {
	    $in = new Bio::AlignIO(-format => $informat, -file => $input);
	} else {
	    $in = new Bio::AlignIO(-format => $informat, -fh => \*ARGV);
	}
    };
    if( $@ ) {
	die("Unknown MSA format to bioperl $informat\n");
    }
    eval {
	if( $output ) {
	    $out = new Bio::AlignIO(-format => $outformat,
				    -file => ">$output", @extra);
	} else {
	    # default to STDOUT for output



( run in 0.630 second using v1.01-cache-2.11-cpan-39bf76dae61 )