BioPerl-Run
view release on metacpan or search on metacpan
lib/Bio/Tools/Run/Alignment/Muscle.pm view on Meta::CPAN
sub align {
my ( $self, $input ) = @_;
# Create input file pointer
$self->io->_io_cleanup();
my $infilename;
if ( defined $input ) {
$infilename = $self->_setinput($input);
}
elsif ( defined $self->in ) {
$infilename = $self->_setinput( $self->in );
}
else {
$self->throw("No inputdata provided\n");
}
if ( !$infilename ) {
$self->throw("Bad input data or less than 2 sequences in $input !");
}
my $param_string = $self->_setparams();
# run muscle
return &_run( $self, $infilename, $param_string );
}
=head2 run_profile
Title : run_profile
Usage : $alnfilename = /t/data/cysprot.msa';
$seqsfilename = 't/data/cysprot.fa';
$aln = $factory->run_profile($alnfilename,$seqsfilename);
Function: Perform a profile alignment on a MSA to include more seqs
Returns : Reference to a SimpleAlign object containing the
sequence alignment.
Args : Name of a file containing the fasta MSA and name of a file
containing a set of unaligned fasta sequences
Comments: This only works for muscle version 3.52.
Some early versions of the 3.6 sources had a bug that
caused a segfault with -profile. The attached should fix
it, if not let Bob Edgar know.
=cut
sub run_profile {
my ( $self, $alnfilename, $seqsfilename ) = @_;
# Create input file pointer
$self->io->_io_cleanup();
if ( $self->version ne '3.52' ) {
$self->throw("profile does not work for this version of muscle\n");
}
my $infilename;
if ( defined $alnfilename ) {
if ( !ref $alnfilename ) {
# check that file exists or throw
$infilename = $alnfilename;
unless ( -e $infilename ) { return 0; }
# let's peek and guess
open( IN, $infilename ) || $self->throw("Cannot open $infilename");
my $header;
while ( defined( $header = <IN> ) ) {
last if $header !~ /^\s+$/;
}
close(IN);
if ( $header !~ /^>\s*\S+/ ) {
$self->throw(
"Need to provide a FASTA format file to muscle profile!");
}
}
}
else {
$self->throw("No inputdata provided\n");
}
if ( !$infilename ) {
$self->throw(
"Bad input data or less than 2 sequences in $infilename !");
}
if ( defined $seqsfilename ) {
if ( !ref $seqsfilename ) {
# check that file exists or throw
$infilename = $seqsfilename;
unless ( -e $infilename ) { return 0; }
# let's peek and guess
open( IN, $infilename ) || $self->throw("Cannot open $infilename");
my $header;
while ( defined( $header = <IN> ) ) {
last if $header !~ /^\s+$/;
}
close(IN);
if ( $header !~ /^>\s*\S+/ ) {
$self->throw(
"Need to provide a FASTA format file to muscle profile!");
}
}
}
else {
$self->throw("No inputdata provided\n");
}
if ( !$infilename ) {
$self->throw(
"Bad input data or less than 2 sequences in $infilename !");
}
my $param_string = $self->_setparams();
# run muscle
$self->{_profile} = 1;
return &_run( $self, "$alnfilename -in2 $seqsfilename", $param_string );
}
=head2 aformat
Title : aformat
Usage : my $alignmentformat = $self->aformat();
Function: Get/Set alignment format
Returns : string
Args : string
=cut
sub aformat {
my $self = shift;
$self->{'_aformat'} = shift if @_;
return $self->{'_aformat'};
}
=head2 _run
Title : _run
Usage : Internal function, not to be called directly
Function: makes actual system call to muscle program
Example :
Returns : nothing; muscle output is written to a
temporary file OR specified output file
Args : Name of a file containing a set of unaligned fasta sequences
and hash of parameters to be passed to muscle
=cut
sub _run {
my ( $self, $infilename, $params ) = @_;
my $commandstring;
if ( $self->{_profile} ) {
$commandstring =
$self->executable . " -profile -in1 $infilename $params";
$self->{_profile} = 0;
}
else {
$commandstring = $self->executable . " -in $infilename $params";
}
$self->debug("muscle command = $commandstring \n");
my $status = system($commandstring);
my $outfile = $self->outfile_name();
if ( !-e $outfile || -z $outfile ) {
$self->warn("Muscle call crashed: $? [command $commandstring]\n");
return undef;
}
my $in = Bio::AlignIO->new(
'-file' => $outfile,
'-format' => $self->aformat
);
my $aln = $in->next_aln();
return $aln;
}
=head2 _setinput
Title : _setinput
Usage : Internal function, not to be called directly
Function: Create input file for muscle program
Example :
Returns : name of file containing muscle data input AND
Args : Arrayref of Seqs or input file name
=cut
sub _setinput {
my ( $self, $input ) = @_;
my ( $infilename, $seq, $temp, $tfh );
if ( !ref $input ) {
# check that file exists or throw
$infilename = $input;
unless ( -e $input ) { return 0; }
# let's peek and guess
open( IN, $infilename ) || $self->throw("Cannot open $infilename");
my $header;
while ( defined( $header = <IN> ) ) {
last if $header !~ /^\s+$/;
}
close(IN);
if ( $header !~ /^>\s*\S+/ ) {
$self->throw("Need to provide a FASTA format file to muscle!");
}
return ($infilename);
}
elsif ( ref($input) =~ /ARRAY/i ) {
# $input may be an array of BioSeq objects...
# Open temporary file for both reading & writing of array
( $tfh, $infilename ) = $self->io->tempfile();
if ( !ref( $input->[0] ) ) {
$self->warn(
"passed an array ref which did not contain objects to _setinput"
);
return undef;
}
elsif ( $input->[0]->isa('Bio::PrimarySeqI') ) {
$temp = Bio::SeqIO->new(
'-fh' => $tfh,
'-format' => 'fasta'
);
my $ct = 1;
foreach $seq (@$input) {
return 0 unless ( ref($seq)
&& $seq->isa("Bio::PrimarySeqI") );
if ( !defined $seq->display_id
|| $seq->display_id =~ /^\s+$/ )
{
$seq->display_id( "Seq" . $ct++ );
}
$temp->write_seq($seq);
}
$temp->close();
undef $temp;
close($tfh);
$tfh = undef;
}
else {
$self->warn( "got an array ref with 1st entry "
. $input->[0]
. " and don't know what to do with it\n" );
}
return ($infilename);
}
else {
$self->warn("Got $input and don't know what to do with it\n");
}
return 0;
}
=head2 _setparams
Title : _setparams
Usage : Internal function, not to be called directly
( run in 2.512 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )