Bio-Tools-Run-Alignment-TCoffee

 view release on metacpan or  search on metacpan

lib/Bio/Tools/Run/Alignment/TCoffee.pm  view on Meta::CPAN

            }
            $instring .= ' -matrix='.$self->matrix unless (($self->matrix =~ /none/i) || ($self->matrix =~ /null/i)) ;
            $instring .= ' -method='.join(',',$self->methods) if ($self->methods) ;
        }
    }
    my $param_string = shift;
#    my ($paramfh,$parameterFile) = $self->io->tempfile;
#    print $paramfh ( "$instring\n-output=gcg$param_string") ;
#    close($paramfh);
#    my $commandstring = "t_coffee -output=gcg -parameters $parameterFile" ;    ##MJL

    my $commandstring = $self->executable." $instring $param_string";

    #$self->debug( "tcoffee command = $commandstring \n");

    my $status = system($commandstring);
    my $outfile = $self->outfile();

    if( !-e $outfile || -z $outfile ) {
        $self->warn( "TCoffee call crashed: $? [command $commandstring]\n");
        return undef;
    }

    # retrieve alignment (Note: MSF format for AlignIO = GCG format of
    # tcoffee)

    my $in  = Bio::AlignIO->new('-file' => $outfile, '-format' =>
                                $self->output);
    my $aln = $in->next_aln();

    # Replace file suffix with dnd to find name of dendrogram file(s) to delete
    if( ! $self->keepdnd ) {
        foreach my $f ( $infilename, $infile1, $infile2 ) {
            next if( !defined $f || $f eq '');
            $f =~ s/\.[^\.]*$// ;
            # because TCoffee writes these files to the CWD
            if( $Bio::Root::IO::PATHSEP ) {
                my @line = split(/$Bio::Root::IO::PATHSEP/, $f);
                $f = pop @line;
            } else {
                (undef, undef, $f) = File::Spec->splitpath($f);
            }
            unlink $f .'.dnd' if( $f ne '' );
        }
    }
    return $aln;
}



sub _setinput {
    my ($self,$input) = @_;
    my ($infilename, $seq, $temp, $tfh);
    #  If $input is not a reference it better be the name of a
    # file with the sequence/ alignment data...
    my $type = '';
    if (! ref $input) {
        # check that file exists or throw
        $infilename = $input;
        unless (-e $input) {return 0;}
        # let's peek and guess
        open(my $IN,$infilename) || $self->throw("Cannot open $infilename");
        my $header = <$IN>;
        if( $header =~ /^\s+\d+\s+\d+/ ||
            $header =~ /Pileup/i ||
            $header =~ /clustal/i ) { # phylip
            $type = 'A';
        }

        # On some systems, having filenames with / in them (ie. a file in a
        # directory) causes t-coffee to completely fail. It warns on all systems.
        # The -no_warning option solves this, but there is still some strange
        # bug when doing certain profile-related things. This is magically solved
        # by copying the profile file to a temp file in the current directory, so
        # it its filename supplied to t-coffee contains no /
        # (It's messy here - I just do this to /all/ input files to most easily
        #  catch all variants of providing a profile - it may only be the last
        #  form (isa("Bio::PrimarySeqI")) that causes a problem?)

        my (undef, undef, $adjustedfilename) = File::Spec->splitpath($infilename);
        if ($adjustedfilename ne $infilename) {
            my ($fh, $tempfile) = $self->io->tempfile(-dir => cwd());
            seek($IN, 0, 0);
            while (<$IN>) {
                print $fh $_;
            }
            close($fh);
            (undef, undef, $tempfile) = File::Spec->splitpath($tempfile);
            $infilename = $tempfile;
            $type = 'S';
        }

        close($IN);
        return ($infilename,$type);
    } elsif (ref($input) =~ /ARRAY/i ) {
        #  $input may be an array of BioSeq objects...
        #  Open temporary file for both reading & writing of array
        ($tfh,$infilename) = $self->io->tempfile(-dir => cwd());
        (undef, undef, $infilename) = File::Spec->splitpath($infilename);
        if( ! ref($input->[0]) ) {
            $self->warn("passed an array ref which did not contain objects to _setinput");
            return undef;
        } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
            $temp =  Bio::SeqIO->new('-fh' => $tfh,
                         '-format' => 'fasta');
            my $ct = 1;
            foreach $seq (@$input) {
                return 0 unless ( ref($seq) &&
                          $seq->isa("Bio::PrimarySeqI") );
                if( ! defined $seq->display_id ||
                    $seq->display_id =~ /^\s+$/) {
                    $seq->display_id( "Seq".$ct++);
                }
                $temp->write_seq($seq);
            }
            $temp->close();
            undef $temp;
            close($tfh);
            $tfh = undef;
            $type = 'S';
        } elsif( $input->[0]->isa('Bio::Align::AlignI' ) ) {

lib/Bio/Tools/Run/Alignment/TCoffee.pm  view on Meta::CPAN

=head2 _numeric_version

Version "numbers" are not numeric values.  In the case of T-Coffee,
they have an hash component at their end.  This was not the case in
older releases.  We use the first two numeric parts to do different
things though.  See also issue #1.

=head2 _run

 Title   :  _run
 Usage   :  Internal function, not to be called directly
 Function:  makes actual system call to tcoffee program
 Example :
 Returns : nothing; tcoffee output is written to a
           temporary file OR specified output file
 Args    : Name of a file containing a set of unaligned fasta sequences
           and hash of parameters to be passed to tcoffee

=head2 _setinput

 Title   :  _setinput
 Usage   :  Internal function, not to be called directly
 Function:  Create input file for tcoffee program
 Example :
 Returns : name of file containing tcoffee data input AND
           type of file (if known, S for sequence, L for sequence library,
           A for sequence alignment)
 Args    : Seq or Align object reference or input file name

=head2 _setparams

 Title   :  _setparams
 Usage   :  Internal function, not to be called directly
 Function:  Create parameter inputs for tcoffee program
 Example :
 Returns : parameter string to be passed to tcoffee
           during align or profile_align
 Args    : name of calling object

=head1 PARAMETERS FOR ALIGNMENT COMPUTATION

There are a number of possible parameters one can pass in TCoffee.
One should really read the online manual for the best explanation of
all the features.  See
http://igs-server.cnrs-mrs.fr/~cnotred/Documentation/t_coffee/t_coffee_doc.html

These can be specified as parameters when instantiating a new TCoffee
object, or through get/set methods of the same name (lowercase).

=head2 IN

 Title       : IN
 Description : (optional) input filename, this is specified when
               align so should not use this directly unless one
               understand TCoffee program very well.

=head2 TYPE

 Title       : TYPE
 Args        : [string] DNA, PROTEIN
 Description : (optional) set the sequence type, guessed automatically
               so should not use this directly

=head2 PARAMETERS

 Title       : PARAMETERS
 Description : (optional) Indicates a file containing extra parameters

=head2 EXTEND

 Title       : EXTEND
 Args        : 0, 1, or positive value
 Default     : 1
 Description : Flag indicating that library extension should be
               carried out when performing multiple alignments, if set
               to 0 then extension is not made, if set to 1 extension
               is made on all pairs in the library.  If extension is
               set to another positive value, the extension is only
               carried out on pairs having a weigth value superior to
               the specified limit.

=head2 DP_NORMALISE

 Title       : DP_NORMALISE
 Args        : 0 or positive value
 Default     : 1000
 Description : When using a value different from 0, this flag sets the
               score of the highest scoring pair to 1000.

=head2 DP_MODE

 Title       : DP_MODE
 Args        : [string] gotoh_pair_wise, myers_miller_pair_wise,
               fasta_pair_wise cfasta_pair_wise
 Default     : cfast_fair_wise
 Description : Indicates the type of dynamic programming used by
               the program

    gotoh_pair_wise : implementation of the gotoh algorithm
    (quadratic in memory and time)

    myers_miller_pair_wise : implementation of the Myers and Miller
    dynamic programming algorithm ( quadratic in time and linear in
    space). This algorithm is recommended for very long sequences. It
    is about 2 time slower than gotoh. It only accepts tg_mode=1.

    fasta_pair_wise: implementation of the fasta algorithm. The
    sequence is hashed, looking for ktuples words. Dynamic programming
    is only carried out on the ndiag best scoring diagonals. This is
    much faster but less accurate than the two previous.

    cfasta_pair_wise : c stands for checked. It is the same
    algorithm. The dynamic programming is made on the ndiag best
    diagonals, and then on the 2*ndiags, and so on until the scores
    converge. Complexity will depend on the level of divergence of the
    sequences, but will usually be L*log(L), with an accuracy
    comparable to the two first mode ( this was checked on BaliBase).

=head2 KTUPLE

 Title       : KTUPLE



( run in 0.420 second using v1.01-cache-2.11-cpan-5b529ec07f3 )