Bio-Tools-Run-Alignment-Clustalw

 view release on metacpan or  search on metacpan

lib/Bio/Tools/Run/Alignment/Clustalw.pm  view on Meta::CPAN

      # $factory->ktuple(1);  # set ktuple parameter
      print "Performing alignment of $param consensus sequences \n";
      $aln = $factory->align(\@consensus);
      $strout = Bio::AlignIO->newFh('-format' => 'msf');
      print $strout $aln;

      return 1;
  }


  #################################################
  #   vary_align_order():
  #
  # For our second example, we'll test the effect of changing the order
  # that sequences are added to the alignment

  sub vary_align_order {

      print "\nBeginning alignment-order-changing example... \n";

      @consensus = ();  # clear array
      for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
          my $obj_out = shift @seq_array;  # remove one Seq object from array and save
          $id = $obj_out->display_id;
          # align remaining sequences
          print "Performing alignment with sequence $id left out \n";
          $subaln = $factory->align(\@seq_array);
          # add left-out sequence to subalignment
          $aln = $factory->profile_align($subaln,$obj_out);
          $string = $aln->consensus_string(); # Get consensus of alignment
          # convert '?' to 'X' for non-consensus positions
          $string =~ s/\?/X/g;
          $consensus[$seq_num] = Bio::Seq->new(-id=>"$id left out",-seq=>$string);
          push @seq_array, $obj_out;  # return Seq object for next (sub) alignment
      }

      # Compare consensus strings for alignments created in different orders
      # $factory->ktuple(1);  # set ktuple parameter
      print "\nPerforming alignment of consensus sequences for different reorderings \n";
      print "Each consensus is labeled by the sequence which was omitted in the initial alignment\n";
      $aln = $factory->align(\@consensus);
      $strout = Bio::AlignIO->newFh('-format' => 'msf');
      print $strout $aln;

      return 1;
  }

  #################################################
  #   anchored_align()
  #
  # For our last example, we'll test a way to perform a local alignment by
  # "anchoring" the alignment to a regular expression.  This is similar
  # to the approach taken in the recent dbclustal program.
  # In principle, we could write a script to search for a good regular expression
  # to use. Instead, here we'll simply choose one manually after looking at the
  # previous alignments.

  sub anchored_align {

      my @local_array = ();
      my @seqs_not_matched = ();

      print "\n Beginning anchored-alignment example... \n";

      for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
          my $seqobj = $seq_array[$seq_num];
          my $seq =  $seqobj->seq();
          my $id =  $seqobj->id();
          # if $regex is not found in the sequence, save sequence id name and set
          # array value =0 for later
          unless ($seq =~/$regex/) {
              $local_array[$seq_num] = 0;
              push (@seqs_not_matched, $id) ;
              next;
          }
          # find positions of start and of subsequence to be aligned
          my $match_start_pos = length($`);
          my $match_stop_pos = length($`) + length($&);
          my $start =  ($match_start_pos - $extension) > 1 ?
              ($match_start_pos - $extension) +1 : 1;
          my $stop =  ($match_stop_pos + $extension) < length($seq) ?
              ($match_stop_pos + $extension) : length($seq);
          my $string = $seqobj->subseq($start, $stop);

          $local_array[$seq_num] = Bio::Seq->new(-id=>$id, -seq=>$string);
      }
      @local_array = grep $_ , @local_array; # remove array entries with no match

      # Perform alignment on the local segments of the sequences which match "anchor"
      $aln = $factory->align(\@local_array);
      my $consensus  = $aln->consensus_string(); # Get consensus of local alignment

      if (scalar(@seqs_not_matched) ) {
          print " Sequences not matching $regex : @seqs_not_matched \n"
      } else {
          print " All sequences match $regex : @seqs_not_matched \n"
  }
      print "Consensus sequence of local alignment: $consensus \n";

      return 1;
  }

  #----------------
  sub clustalw_usage {
  #----------------

  #-----------------------
  # Prints usage information for general parameters.

      print STDERR <<"QQ_PARAMS_QQ";

   Command-line accessible script variables and commands:
   -------------------------------
   -h                 :  Display this usage info and exit.
   -in <str>          :  File containing input sequences in fasta format (default = $infile) .
   -do <str>          :  String listing examples to be executed. Default is to execute
                         all tests (ie default = '123')
   -param <str>   :  Parameter to be varied in example 1. Any clustalw parameter
                     which takes inteer values can be varied (default = 'ktuple')
   -start <int>   :  Initial value for varying parameter in example 1 (default = 1)
   -stop <int>    :  Final value for varying parameter (default = 3)
   -regex   <str> :  Regular expression for 'anchoring' alignment in example 3
                     (default = $regex)
   -ext <int>     :  Distance regexp anchor should be extended in each direction
                     for local alignment in example 3   (default = 30)

  In addition, any valid Clustalw parameter can be set using the syntax
  "parameter=>value" as in "ktuple=>3"

  So a typical command lines might be:
   > clustalw.pl -param=pairgap -start=2 -stop=3 -do=1 "ktuple=>3"
  or
   > clustalw.pl -ext=10 -regex='W[AST]F' -do=23 -in='t/cysprot1a.fa'

  QQ_PARAMS_QQ

  }

=head1 PARAMETER FOR ALIGNMENT COMPUTATION

=head2 KTUPLE

 Title       : KTUPLE
 Description : (optional) set the word size to be used in the alignment
               This is the size of exactly matching fragment that is used.
               INCREASE for speed (max= 2 for proteins; 4 for DNA),
               DECREASE for sensitivity.
               For longer sequences (e.g. >1000 residues) you may
               need to increase the default

=head2 TOPDIAGS

 Title       : TOPDIAGS
 Description : (optional) number of best diagonals to use
               The number of k-tuple matches on each diagonal
               (in an imaginary dot-matrix plot) is calculated.



( run in 1.838 second using v1.01-cache-2.11-cpan-39bf76dae61 )