BioPerl

 view release on metacpan or  search on metacpan

Bio/SimpleAlign.pm  view on Meta::CPAN


    $self->throw("Need two arguments: a regexp and a string")
      unless defined $from and defined $to;

    foreach $seq ( $self->each_seq() ) {
        $temp = $seq->seq();
        $temp =~ s/$from/$to/g;
        $seq->seq($temp);
    }
    return 1;
}


=head2 uppercase

 Title     : uppercase()
 Usage     : $ali->uppercase()
 Function  : Sets all the sequences to uppercase
 Returns   : 1 on success
 Argument  :

=cut

sub uppercase {
    my $self = shift;
    my $seq;
    my $temp;

    foreach $seq ( $self->each_seq() ) {
      $temp = $seq->seq();
      $temp =~ tr/[a-z]/[A-Z]/;

      $seq->seq($temp);
    }
    return 1;
}

=head2 cigar_line

 Title    : cigar_line()
 Usage    : %cigars = $align->cigar_line()
 Function : Generates a "cigar" (Compact Idiosyncratic Gapped Alignment
            Report) line for each sequence in the alignment. Examples are
            "1,60" or "5,10:12,58", where the numbers refer to conserved
            positions within the alignment. The keys of the hash are the
            NSEs (name/start/end) assigned to each sequence.
 Args     : threshold (optional, defaults to 100)
 Returns  : Hash of strings (cigar lines)

=cut

sub cigar_line {
	my $self = shift;
	my $thr=shift||100;
	my %cigars;

	my @consensus = split "",($self->consensus_string($thr));
	my $len = $self->length;
	my $gapchar = $self->gap_char;

	# create a precursor, something like (1,4,5,6,7,33,45),
	# where each number corresponds to a conserved position
	foreach my $seq ( $self->each_seq ) {
		my @seq = split "", uc ($seq->seq);
		my $pos = 1;
		for (my $x = 0 ; $x < $len ; $x++ ) {
			if ($seq[$x] eq $consensus[$x]) {
				push @{$cigars{$seq->get_nse}},$pos;
				$pos++;
			} elsif ($seq[$x] ne $gapchar) {
				$pos++;
			}
		}
	}
	# duplicate numbers - (1,4,5,6,7,33,45) becomes (1,1,4,5,6,7,33,33,45,45)
	for my $name (keys %cigars) {
		splice @{$cigars{$name}}, 1, 0, ${$cigars{$name}}[0] if
		  ( ${$cigars{$name}}[0] + 1 < ${$cigars{$name}}[1] );
      push @{$cigars{$name}}, ${$cigars{$name}}[$#{$cigars{$name}}] if
           ( ${$cigars{$name}}[($#{$cigars{$name}} - 1)] + 1 <
		          ${$cigars{$name}}[$#{$cigars{$name}}] );
		for ( my $x = 1 ; $x < $#{$cigars{$name}} - 1 ; $x++) {
			if (${$cigars{$name}}[$x - 1] + 1 < ${$cigars{$name}}[$x]  &&
		       ${$cigars{$name}}[$x + 1]  > ${$cigars{$name}}[$x] + 1) {
	         splice @{$cigars{$name}}, $x, 0, ${$cigars{$name}}[$x];
			}
      }
	}
  # collapse series - (1,1,4,5,6,7,33,33,45,45) becomes (1,1,4,7,33,33,45,45)
  for my $name (keys %cigars) {
	  my @remove;
	  for ( my $x = 0 ; $x < $#{$cigars{$name}} ; $x++) {
		   if ( ${$cigars{$name}}[$x] == ${$cigars{$name}}[($x - 1)] + 1 &&
			     ${$cigars{$name}}[$x] == ${$cigars{$name}}[($x + 1)] - 1 ) {
		      unshift @remove,$x;
	      }
	   }
      for my $pos (@remove) {
		  	splice @{$cigars{$name}}, $pos, 1;
	   }
   }
   # join and punctuate
   for my $name (keys %cigars) {
 	  my ($start,$end,$str) = "";
 	  while ( ($start,$end) = splice @{$cigars{$name}}, 0, 2 ) {
 		  $str .= ($start . "," . $end . ":");
 	  }
 	  $str =~ s/:$//;
      $cigars{$name} = $str;
   }
   %cigars;
}


=head2 match_line

 Title    : match_line()
 Usage    : $line = $align->match_line()
 Function : Generates a match line - much like consensus string
            except that a line indicating the '*' for a match.
 Args     : (optional) Match line characters ('*' by default)



( run in 1.247 second using v1.01-cache-2.11-cpan-39bf76dae61 )