BioPerl
view release on metacpan or search on metacpan
Bio/SimpleAlign.pm view on Meta::CPAN
unless( exists( $self->{'_start_end_lists'}->{$id})) {
$self->{'_start_end_lists'}->{$id} = [];
}
push @{$self->{'_start_end_lists'}->{$id}}, $seq;
}
$self->{'_seq'}->{$name} = $seq;
}
=head2 remove_seq
Title : remove_seq
Usage : $aln->remove_seq($seq);
Function : Removes a single sequence from an alignment
Returns :
Argument : a Bio::LocatableSeq object
=cut
sub removeSeq {
my $self = shift;
$self->deprecated("removeSeq - deprecated method. Use remove_seq() instead.");
$self->remove_seq(@_);
}
sub remove_seq {
my $self = shift;
my $seq = shift;
my ($name,$id);
$self->throw("Need Bio::Locatable seq argument ")
unless ref $seq && $seq->isa( 'Bio::LocatableSeq');
$id = $seq->id();
$name = $seq->get_nse;
if( !exists $self->{'_seq'}->{$name} ) {
$self->throw("Sequence $name does not exist in the alignment to remove!");
}
delete $self->{'_seq'}->{$name};
# we need to remove this seq from the start_end_lists hash
if (exists $self->{'_start_end_lists'}->{$id}) {
# we need to find the sequence in the array.
my ($i, $found);;
for ($i=0; $i < @{$self->{'_start_end_lists'}->{$id}}; $i++) {
if (${$self->{'_start_end_lists'}->{$id}}[$i] eq $seq) {
$found = 1;
last;
}
}
if ($found) {
splice @{$self->{'_start_end_lists'}->{$id}}, $i, 1;
}
else {
$self->throw("Could not find the sequence to remoce from the start-end list");
}
}
else {
$self->throw("There is no seq list for the name $id");
}
# we need to shift order hash
my %rev_order = reverse %{$self->{'_order'}};
my $no = $rev_order{$name};
my $num_sequences = $self->num_sequences;
for (; $no < $num_sequences; $no++) {
$self->{'_order'}->{$no} = $self->{'_order'}->{$no+1};
}
delete $self->{'_order'}->{$no};
return 1;
}
=head2 purge
Title : purge
Usage : $aln->purge(0.7);
Function: Removes sequences above given sequence similarity
This function will grind on large alignments. Beware!
Example :
Returns : An array of the removed sequences
Args : float, threshold for similarity
=cut
sub purge {
my ($self,$perc) = @_;
my (%duplicate, @dups);
my @seqs = $self->each_seq();
for (my $i=0;$i< @seqs - 1;$i++ ) { #for each seq in alignment
my $seq = $seqs[$i];
#skip if already in duplicate hash
next if exists $duplicate{$seq->display_id} ;
my $one = $seq->seq();
my @one = split '', $one; #split to get 1aa per array element
for (my $j=$i+1;$j < @seqs;$j++) {
my $seq2 = $seqs[$j];
#skip if already in duplicate hash
next if exists $duplicate{$seq2->display_id} ;
my $two = $seq2->seq();
my @two = split '', $two;
my $count = 0;
my $res = 0;
for (my $k=0;$k<@one;$k++) {
if ( $one[$k] ne '.' && $one[$k] ne '-' && defined($two[$k]) &&
$one[$k] eq $two[$k]) {
$count++;
}
( run in 0.636 second using v1.01-cache-2.11-cpan-39bf76dae61 )