Algorithm-NeedlemanWunsch
view release on metacpan or search on metacpan
lib/Algorithm/NeedlemanWunsch.pm view on Meta::CPAN
++$j;
}
} else {
$j = 1;
while ($j <= $n) {
foreach (@D) {
$_->[0]->[$j] = 0;
}
++$j;
}
}
my $i = 1;
while ($i <= $m) {
foreach (@D) {
$_->[$i]->[0] = $self->{gap_open_penalty} +
($i - 1) * $self->{gap_extend_penalty};
}
++$i;
}
# order must correspond to $from_* constants
my @subproblems = ( $score_diag, $score_up, $score_left );
$i = 1;
while ($i <= $m) {
$j = 1;
while ($j <= $n) {
my $k = 0;
while ($k < 3) { # scalar(@D), scalar(@subproblems)
$D[$k]->[$i]->[$j] = &{$subproblems[$k]}($i, $j);
++$k;
}
# my $x = join ', ', map { $_->[$i]->[$j]; } @D;
# warn "$i, $j: $x\n";
++$j;
}
++$i;
}
# like $score_up
my @delta_up = ( $self->{gap_open_penalty}, $self->{gap_extend_penalty},
$self->{gap_open_penalty} );
# like $score_left
my @delta_left = ( $self->{gap_open_penalty}, $self->{gap_open_penalty},
$self->{gap_extend_penalty} );
my @no_delta = (0, 0, 0);
my @score = map { $_->[$m]->[$n]; } @D;
my $res = max(@score);
my $arrow = 0;
my $flag = 1;
my $idx = 0;
while ($idx < 3) { # scalar(@score)
if ($score[$idx] == $res) {
$arrow |= $flag;
}
$flag *= 2;
++$idx;
}
$i = $m;
$j = $n;
while (($i > 0) || ($j > 0)) {
my @alt;
if ($arrow & $from_diag) {
die "internal error" unless ($i > 0) && ($j > 0);
push @alt, [ $i - 1, $j - 1 ];
}
if ($arrow & $from_up) {
die "internal error" unless ($i > 0);
push @alt, [ $i - 1, $j ];
}
if ($arrow & $from_left) {
die "internal error" unless ($j > 0);
push @alt, [ $i, $j - 1];
}
if (!@alt) {
die "internal error";
}
# my $x = join ', ', map { "[ " . $_->[0] . ", " . $_->[1] . " ]"; } @alt;
# warn "$i, $j: $x\n";
my $cur = [ $i, $j ];
my $move;
if (@alt == 1) {
$move = $self->_simple_trace_back($cur, $alt[0],
$self->{callbacks});
} else {
$move = $self->_trace_back($cur, \@alt);
}
if ($move eq 'align') {
--$i;
--$j;
@score = map { $_->[$i]->[$j]; } @D;
if ($i == 0) {
$arrow = $from_left;
} elsif ($j == 0) {
$arrow = $from_up;
} else {
my $d = max(@score);
$arrow = 0;
$flag = 1;
$idx = 0;
while ($idx < 3) { # scalar(@score)
if ($score[$idx] == $d) {
$arrow |= $flag;
}
$flag *= 2;
++$idx;
}
}
} elsif ($move eq 'shift_a') {
--$j;
my @base = map { $_->[$i]->[$j] } @D;
my $delta;
if ($self->{local} && ($i == $m)) {
$delta = \@no_delta;
} else {
$delta = \@delta_left;
}
$arrow = $self->_retread($score[$from_left_idx], $i, $j,
\@base, $delta);
@score = @base;
} elsif ($move eq 'shift_b') {
--$i;
my @base = map { $_->[$i]->[$j] } @D;
$arrow = $self->_retread($score[$from_up_idx], $i, $j,
\@base, \@delta_up);
@score = @base;
} else {
die "internal error";
}
}
return $res;
}
sub _retread {
my ($self, $to_score, $i, $j, $base, $delta) = @_;
if ($i == 0) {
return $from_left;
} elsif ($j == 0) {
return $from_up;
}
my $a = 0;
my $flag = 1;
my $idx = 0;
while ($idx < 3) {
if ($base->[$idx] + $delta->[$idx] == $to_score) {
$a |= $flag;
}
$flag *= 2;
++$idx;
}
return $a;
}
sub _trace_back {
my ($self, $cur, $sources) = @_;
my $arg = { };
foreach my $next (@$sources) {
my $m = $self->_simple_trace_back($cur, $next, { });
if ($m eq 'align') {
$arg->{align} = [ $cur->[1] - 1, $cur->[0] - 1 ];
} elsif ($m eq 'shift_a') {
$arg->{shift_a} = $cur->[1] - 1;
} elsif ($m eq 'shift_b') {
$arg->{shift_b} = $cur->[0] - 1;
} else {
die "internal error";
}
}
my $move;
my $cb = $self->{callbacks};
if (exists($cb->{select_align})) {
$move = &{$cb->{select_align}}($arg);
if (!exists($arg->{$move})) {
die "select_align callback returned invalid selection $move.";
}
} else {
my @cn = qw(align shift_a shift_b);
foreach my $m (@cn) {
if (exists($arg->{$m})) {
$move = $m;
last;
}
}
if (!$move) {
die "internal error";
}
if (exists($cb->{$move})) {
if ($move eq 'align') {
&{$cb->{align}}(@{$arg->{align}});
} else {
&{$cb->{$move}}($arg->{$move});
}
}
}
return $move;
}
sub _simple_trace_back {
my ($self, $cur, $next, $cb) = @_;
if ($next->[0] == $cur->[0] - 1) {
if ($next->[1] == $cur->[1] - 1) {
lib/Algorithm/NeedlemanWunsch.pm view on Meta::CPAN
=item select_align
Called when there's more than one way to construct the optimal
alignment, with 1 argument which is a hashref enumerating the
possibilities. The hash may contain the following keys:
=over
=item align
If this key exists, the optimal alignment may align two sequence
items. The key's value is an arrayref with the positions of the paired
items in C<\@a> and C<\@b>, respectively.
=item shift_a
If this key exists, the optimal alignment may align an item of the
first sequence with a gap in the second sequence. The key's value is
the position of the item in C<\@a>.
=item shift_b
If this key exists, the optimal alignment may align a gap in the first
sequence with an item of the second sequence. The key's value is
the position of the item in C<\@b>.
=back
All keys are optional, but the hash will always have at least one. The
callback must select one of the possibilities by returning one of the
keys.
=back
All callbacks are optional. When there is just one way to make the
optimal alignment, the C<Algorithm::NeedlemanWunsch> object prefers
calling the specific callbacks, but will call C<select_align> if it's
defined and the specific callback isn't.
Note that C<select_align> is called I<instead> of the specific
callbacks, not in addition to them - users defining both
C<select_align> and other callbacks should probably call the specific
callback explicitly from their C<select_align>, once it decides which
one to prefer.
Also note that the passed positions move backwards, from the sequence
ends to zero - if you're building the alignment in your callbacks, add
items to the front.
=head2 Extensions
In addition to the standard Needleman-Wunsch algorithm, this module
also implements two popular extensions: local alignment and affine
block gap penalties. Use of both extensions is controlled by setting
the properties of C<Algorithm::NeedlemanWunsch> object described
below.
=head3 local
When this flag is set before calling
C<Algorithm::NeedlemanWunsch::align>, the alignment scoring doesn't
charge the gap penalty for gaps at the beginning (i.e. before the
first item) and end (after the last item) of the second sequence
passed to C<align>, so that for example the optimal (with identity
matrix as similarity matrix and a negative gap penalty) alignment of
C<a b c d e f g h> and C<b c h> becomes
sequence A: a b c d e f g h
| |
sequence B: b c h
instead of the global
sequence A: a b c d e f g h
| | |
sequence B: - b c - - - - h
Note that local alignment is asymmetrical - when using it, the longer
sequence should be the first passed to
C<Algorithm::NeedlemanWunsch::align>.
=head3 gap_open_penalty, gap_extend_penalty
Using the same gap penalty for every gap has the advantage of
simplicity, but some applications may want a more complicated
approach. Biologists, for example, looking for gaps longer than one
DNA sequence base, typically want to distinguish a gap opening
(costly) from more missing items following it (shouldn't cost so
much). That requirement can be modelled by charging 2 gap penalties:
C<gap_open_penalty> for the first gap, and then C<gap_extend_penalty>
for each consecutive gap on the same sequence.
Note that you must set both these properties if you set any of them
and that C<gap_open_penalty> must be less than C<gap_extend_penalty>
(if you know of a use case where the gap opening penalty should be
preferred to gap extension, let me know). With such penalties set
before calling C<Algorithm::NeedlemanWunsch::align>, sequences C<A T G
T A G T G T A T A G T A C A T G C A> and C<A T G T A G T A C A T G C
A> are aligned
sequence A: A T G T A G T G T A T A G T A C A T G C A
| | | | | | | | | | | | | |
sequence B: A T G - - - - - - - T A G T A C A T G C A
i.e. with all gaps bunched together.
=head1 SEE ALSO
L<Algorithm::Diff>, L<Text::WagnerFischer>
=head1 AUTHOR
Vaclav Barta, C<< <vbar@comp.cz> >>
=head1 BUGS
Please report any bugs or feature requests to
C<bug-algorithm-needlemanwunsch at rt.cpan.org>, or through the web
interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Algorithm-NeedlemanWunsch>.
( run in 1.396 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )