Bio-Tools-Run-Alignment-Clustalw
view release on metacpan or search on metacpan
lib/Bio/Tools/Run/Alignment/Clustalw.pm view on Meta::CPAN
# $factory->ktuple(1); # set ktuple parameter
print "Performing alignment of $param consensus sequences \n";
$aln = $factory->align(\@consensus);
$strout = Bio::AlignIO->newFh('-format' => 'msf');
print $strout $aln;
return 1;
}
#################################################
# vary_align_order():
#
# For our second example, we'll test the effect of changing the order
# that sequences are added to the alignment
sub vary_align_order {
print "\nBeginning alignment-order-changing example... \n";
@consensus = (); # clear array
for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
my $obj_out = shift @seq_array; # remove one Seq object from array and save
$id = $obj_out->display_id;
# align remaining sequences
print "Performing alignment with sequence $id left out \n";
$subaln = $factory->align(\@seq_array);
# add left-out sequence to subalignment
$aln = $factory->profile_align($subaln,$obj_out);
$string = $aln->consensus_string(); # Get consensus of alignment
# convert '?' to 'X' for non-consensus positions
$string =~ s/\?/X/g;
$consensus[$seq_num] = Bio::Seq->new(-id=>"$id left out",-seq=>$string);
push @seq_array, $obj_out; # return Seq object for next (sub) alignment
}
# Compare consensus strings for alignments created in different orders
# $factory->ktuple(1); # set ktuple parameter
print "\nPerforming alignment of consensus sequences for different reorderings \n";
print "Each consensus is labeled by the sequence which was omitted in the initial alignment\n";
$aln = $factory->align(\@consensus);
$strout = Bio::AlignIO->newFh('-format' => 'msf');
print $strout $aln;
return 1;
}
#################################################
# anchored_align()
#
# For our last example, we'll test a way to perform a local alignment by
# "anchoring" the alignment to a regular expression. This is similar
# to the approach taken in the recent dbclustal program.
# In principle, we could write a script to search for a good regular expression
# to use. Instead, here we'll simply choose one manually after looking at the
# previous alignments.
sub anchored_align {
my @local_array = ();
my @seqs_not_matched = ();
print "\n Beginning anchored-alignment example... \n";
for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
my $seqobj = $seq_array[$seq_num];
my $seq = $seqobj->seq();
my $id = $seqobj->id();
# if $regex is not found in the sequence, save sequence id name and set
# array value =0 for later
unless ($seq =~/$regex/) {
$local_array[$seq_num] = 0;
push (@seqs_not_matched, $id) ;
next;
}
# find positions of start and of subsequence to be aligned
my $match_start_pos = length($`);
my $match_stop_pos = length($`) + length($&);
my $start = ($match_start_pos - $extension) > 1 ?
($match_start_pos - $extension) +1 : 1;
my $stop = ($match_stop_pos + $extension) < length($seq) ?
($match_stop_pos + $extension) : length($seq);
my $string = $seqobj->subseq($start, $stop);
$local_array[$seq_num] = Bio::Seq->new(-id=>$id, -seq=>$string);
}
@local_array = grep $_ , @local_array; # remove array entries with no match
# Perform alignment on the local segments of the sequences which match "anchor"
$aln = $factory->align(\@local_array);
my $consensus = $aln->consensus_string(); # Get consensus of local alignment
if (scalar(@seqs_not_matched) ) {
print " Sequences not matching $regex : @seqs_not_matched \n"
} else {
print " All sequences match $regex : @seqs_not_matched \n"
}
print "Consensus sequence of local alignment: $consensus \n";
return 1;
}
#----------------
sub clustalw_usage {
#----------------
#-----------------------
# Prints usage information for general parameters.
print STDERR <<"QQ_PARAMS_QQ";
Command-line accessible script variables and commands:
-------------------------------
-h : Display this usage info and exit.
-in <str> : File containing input sequences in fasta format (default = $infile) .
-do <str> : String listing examples to be executed. Default is to execute
all tests (ie default = '123')
-param <str> : Parameter to be varied in example 1. Any clustalw parameter
which takes inteer values can be varied (default = 'ktuple')
-start <int> : Initial value for varying parameter in example 1 (default = 1)
-stop <int> : Final value for varying parameter (default = 3)
-regex <str> : Regular expression for 'anchoring' alignment in example 3
(default = $regex)
-ext <int> : Distance regexp anchor should be extended in each direction
for local alignment in example 3 (default = 30)
In addition, any valid Clustalw parameter can be set using the syntax
"parameter=>value" as in "ktuple=>3"
So a typical command lines might be:
> clustalw.pl -param=pairgap -start=2 -stop=3 -do=1 "ktuple=>3"
or
> clustalw.pl -ext=10 -regex='W[AST]F' -do=23 -in='t/cysprot1a.fa'
QQ_PARAMS_QQ
}
=head1 PARAMETER FOR ALIGNMENT COMPUTATION
=head2 KTUPLE
Title : KTUPLE
Description : (optional) set the word size to be used in the alignment
This is the size of exactly matching fragment that is used.
INCREASE for speed (max= 2 for proteins; 4 for DNA),
DECREASE for sensitivity.
For longer sequences (e.g. >1000 residues) you may
need to increase the default
=head2 TOPDIAGS
Title : TOPDIAGS
Description : (optional) number of best diagonals to use
The number of k-tuple matches on each diagonal
(in an imaginary dot-matrix plot) is calculated.
( run in 1.838 second using v1.01-cache-2.11-cpan-39bf76dae61 )