BioPerl-Run

 view release on metacpan or  search on metacpan

lib/Bio/Tools/Run/Alignment/Muscle.pm  view on Meta::CPAN

sub align {
    my ( $self, $input ) = @_;

    # Create input file pointer
    $self->io->_io_cleanup();
    my $infilename;
    if ( defined $input ) {
        $infilename = $self->_setinput($input);
    }
    elsif ( defined $self->in ) {
        $infilename = $self->_setinput( $self->in );
    }
    else {
        $self->throw("No inputdata provided\n");
    }
    if ( !$infilename ) {
        $self->throw("Bad input data or less than 2 sequences in $input !");
    }

    my $param_string = $self->_setparams();

    # run muscle
    return &_run( $self, $infilename, $param_string );
}

=head2  run_profile

 Title   : run_profile
 Usage   : $alnfilename = /t/data/cysprot.msa';
           $seqsfilename = 't/data/cysprot.fa';
           $aln = $factory->run_profile($alnfilename,$seqsfilename);

 Function: Perform a profile alignment on a MSA to include more seqs
 Returns : Reference to a SimpleAlign object containing the
           sequence alignment.
 Args    : Name of a file containing the fasta MSA and name of a file
           containing a set of unaligned fasta sequences
 Comments: This only works for muscle version 3.52.
           Some early versions of the 3.6 sources had a bug that
           caused a segfault with -profile. The attached should fix
           it, if not let Bob Edgar know.

=cut

sub run_profile {
    my ( $self, $alnfilename, $seqsfilename ) = @_;

    # Create input file pointer
    $self->io->_io_cleanup();
    if ( $self->version ne '3.52' ) {
        $self->throw("profile does not work for this version of muscle\n");
    }
    my $infilename;
    if ( defined $alnfilename ) {
        if ( !ref $alnfilename ) {

            # check that file exists or throw
            $infilename = $alnfilename;
            unless ( -e $infilename ) { return 0; }

            # let's peek and guess
            open( IN, $infilename ) || $self->throw("Cannot open $infilename");
            my $header;
            while ( defined( $header = <IN> ) ) {
                last if $header !~ /^\s+$/;
            }
            close(IN);
            if ( $header !~ /^>\s*\S+/ ) {
                $self->throw(
                    "Need to provide a FASTA format file to muscle profile!");
            }
        }
    }
    else {
        $self->throw("No inputdata provided\n");
    }
    if ( !$infilename ) {
        $self->throw(
            "Bad input data or less than 2 sequences in $infilename !");
    }
    if ( defined $seqsfilename ) {
        if ( !ref $seqsfilename ) {

            # check that file exists or throw
            $infilename = $seqsfilename;
            unless ( -e $infilename ) { return 0; }

            # let's peek and guess
            open( IN, $infilename ) || $self->throw("Cannot open $infilename");
            my $header;
            while ( defined( $header = <IN> ) ) {
                last if $header !~ /^\s+$/;
            }
            close(IN);
            if ( $header !~ /^>\s*\S+/ ) {
                $self->throw(
                    "Need to provide a FASTA format file to muscle profile!");
            }
        }
    }
    else {
        $self->throw("No inputdata provided\n");
    }
    if ( !$infilename ) {
        $self->throw(
            "Bad input data or less than 2 sequences in $infilename !");
    }

    my $param_string = $self->_setparams();

    # run muscle
    $self->{_profile} = 1;
    return &_run( $self, "$alnfilename -in2 $seqsfilename", $param_string );
}

=head2 aformat

 Title   : aformat
 Usage   : my $alignmentformat = $self->aformat();
 Function: Get/Set alignment format
 Returns : string
 Args    : string

=cut

sub aformat {
    my $self = shift;
    $self->{'_aformat'} = shift if @_;
    return $self->{'_aformat'};
}

=head2  _run

 Title   :  _run
 Usage   :  Internal function, not to be called directly
 Function:  makes actual system call to muscle program
 Example :
 Returns : nothing; muscle output is written to a
           temporary file OR specified output file
 Args    : Name of a file containing a set of unaligned fasta sequences
           and hash of parameters to be passed to muscle


=cut

sub _run {
    my ( $self, $infilename, $params ) = @_;
    my $commandstring;
    if ( $self->{_profile} ) {
        $commandstring =
          $self->executable . " -profile -in1 $infilename $params";
        $self->{_profile} = 0;
    }
    else {
        $commandstring = $self->executable . " -in $infilename $params";
    }

    $self->debug("muscle command = $commandstring \n");

    my $status  = system($commandstring);
    my $outfile = $self->outfile_name();
    if ( !-e $outfile || -z $outfile ) {
        $self->warn("Muscle call crashed: $? [command $commandstring]\n");
        return undef;
    }

    my $in = Bio::AlignIO->new(
        '-file'   => $outfile,
        '-format' => $self->aformat
    );
    my $aln = $in->next_aln();
    return $aln;
}

=head2  _setinput

 Title   :  _setinput
 Usage   :  Internal function, not to be called directly
 Function:  Create input file for muscle program
 Example :
 Returns : name of file containing muscle data input AND
 Args    : Arrayref of Seqs or input file name

=cut

sub _setinput {
    my ( $self, $input ) = @_;
    my ( $infilename, $seq, $temp, $tfh );
    if ( !ref $input ) {

        # check that file exists or throw
        $infilename = $input;
        unless ( -e $input ) { return 0; }

        # let's peek and guess
        open( IN, $infilename ) || $self->throw("Cannot open $infilename");
        my $header;
        while ( defined( $header = <IN> ) ) {
            last if $header !~ /^\s+$/;
        }
        close(IN);
        if ( $header !~ /^>\s*\S+/ ) {
            $self->throw("Need to provide a FASTA format file to muscle!");
        }
        return ($infilename);
    }
    elsif ( ref($input) =~ /ARRAY/i ) {

        # $input may be an array of BioSeq objects...
        # Open temporary file for both reading & writing of array
        ( $tfh, $infilename ) = $self->io->tempfile();
        if ( !ref( $input->[0] ) ) {
            $self->warn(
                "passed an array ref which did not contain objects to _setinput"
            );
            return undef;
        }
        elsif ( $input->[0]->isa('Bio::PrimarySeqI') ) {
            $temp = Bio::SeqIO->new(
                '-fh'     => $tfh,
                '-format' => 'fasta'
            );
            my $ct = 1;
            foreach $seq (@$input) {
                return 0 unless ( ref($seq)
                    && $seq->isa("Bio::PrimarySeqI") );
                if ( !defined $seq->display_id
                    || $seq->display_id =~ /^\s+$/ )
                {
                    $seq->display_id( "Seq" . $ct++ );
                }
                $temp->write_seq($seq);
            }
            $temp->close();
            undef $temp;
            close($tfh);
            $tfh = undef;
        }
        else {
            $self->warn( "got an array ref with 1st entry "
                  . $input->[0]
                  . " and don't know what to do with it\n" );
        }
        return ($infilename);
    }
    else {
        $self->warn("Got $input and don't know what to do with it\n");
    }
    return 0;
}

=head2  _setparams

 Title   :  _setparams
 Usage   :  Internal function, not to be called directly



( run in 2.512 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )