BioPerl
view release on metacpan or search on metacpan
Bio/SimpleAlign.pm view on Meta::CPAN
$self->throw("Need two arguments: a regexp and a string")
unless defined $from and defined $to;
foreach $seq ( $self->each_seq() ) {
$temp = $seq->seq();
$temp =~ s/$from/$to/g;
$seq->seq($temp);
}
return 1;
}
=head2 uppercase
Title : uppercase()
Usage : $ali->uppercase()
Function : Sets all the sequences to uppercase
Returns : 1 on success
Argument :
=cut
sub uppercase {
my $self = shift;
my $seq;
my $temp;
foreach $seq ( $self->each_seq() ) {
$temp = $seq->seq();
$temp =~ tr/[a-z]/[A-Z]/;
$seq->seq($temp);
}
return 1;
}
=head2 cigar_line
Title : cigar_line()
Usage : %cigars = $align->cigar_line()
Function : Generates a "cigar" (Compact Idiosyncratic Gapped Alignment
Report) line for each sequence in the alignment. Examples are
"1,60" or "5,10:12,58", where the numbers refer to conserved
positions within the alignment. The keys of the hash are the
NSEs (name/start/end) assigned to each sequence.
Args : threshold (optional, defaults to 100)
Returns : Hash of strings (cigar lines)
=cut
sub cigar_line {
my $self = shift;
my $thr=shift||100;
my %cigars;
my @consensus = split "",($self->consensus_string($thr));
my $len = $self->length;
my $gapchar = $self->gap_char;
# create a precursor, something like (1,4,5,6,7,33,45),
# where each number corresponds to a conserved position
foreach my $seq ( $self->each_seq ) {
my @seq = split "", uc ($seq->seq);
my $pos = 1;
for (my $x = 0 ; $x < $len ; $x++ ) {
if ($seq[$x] eq $consensus[$x]) {
push @{$cigars{$seq->get_nse}},$pos;
$pos++;
} elsif ($seq[$x] ne $gapchar) {
$pos++;
}
}
}
# duplicate numbers - (1,4,5,6,7,33,45) becomes (1,1,4,5,6,7,33,33,45,45)
for my $name (keys %cigars) {
splice @{$cigars{$name}}, 1, 0, ${$cigars{$name}}[0] if
( ${$cigars{$name}}[0] + 1 < ${$cigars{$name}}[1] );
push @{$cigars{$name}}, ${$cigars{$name}}[$#{$cigars{$name}}] if
( ${$cigars{$name}}[($#{$cigars{$name}} - 1)] + 1 <
${$cigars{$name}}[$#{$cigars{$name}}] );
for ( my $x = 1 ; $x < $#{$cigars{$name}} - 1 ; $x++) {
if (${$cigars{$name}}[$x - 1] + 1 < ${$cigars{$name}}[$x] &&
${$cigars{$name}}[$x + 1] > ${$cigars{$name}}[$x] + 1) {
splice @{$cigars{$name}}, $x, 0, ${$cigars{$name}}[$x];
}
}
}
# collapse series - (1,1,4,5,6,7,33,33,45,45) becomes (1,1,4,7,33,33,45,45)
for my $name (keys %cigars) {
my @remove;
for ( my $x = 0 ; $x < $#{$cigars{$name}} ; $x++) {
if ( ${$cigars{$name}}[$x] == ${$cigars{$name}}[($x - 1)] + 1 &&
${$cigars{$name}}[$x] == ${$cigars{$name}}[($x + 1)] - 1 ) {
unshift @remove,$x;
}
}
for my $pos (@remove) {
splice @{$cigars{$name}}, $pos, 1;
}
}
# join and punctuate
for my $name (keys %cigars) {
my ($start,$end,$str) = "";
while ( ($start,$end) = splice @{$cigars{$name}}, 0, 2 ) {
$str .= ($start . "," . $end . ":");
}
$str =~ s/:$//;
$cigars{$name} = $str;
}
%cigars;
}
=head2 match_line
Title : match_line()
Usage : $line = $align->match_line()
Function : Generates a match line - much like consensus string
except that a line indicating the '*' for a match.
Args : (optional) Match line characters ('*' by default)
( run in 1.247 second using v1.01-cache-2.11-cpan-39bf76dae61 )