BioPerl

 view release on metacpan or  search on metacpan

Bio/SeqUtils.pm  view on Meta::CPAN

        else                       { $strand = $_->strand }
        my $newend =
          $self->_coord_revcom( $_->start, $_->start_pos_type, $length );
        my $newstart =
          $self->_coord_revcom( $_->end, $_->end_pos_type, $length );
        my $newstart_type = $_->end_pos_type;
        $newstart_type = 'BEFORE' if $_->end_pos_type eq 'AFTER';
        $newstart_type = 'AFTER'  if $_->end_pos_type eq 'BEFORE';
        my $newend_type = $_->start_pos_type;
        $newend_type = 'BEFORE' if $_->start_pos_type eq 'AFTER';
        $newend_type = 'AFTER'  if $_->start_pos_type eq 'BEFORE';
        push @loc,
          $self->_location_objects_from_coordinate_list(
            [ [ $newstart, $newend, $newstart_type, $newend_type ] ],
            $strand, $type );
    }
    my $newfeat =
      Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag );
    foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
        foreach my $value ( $feat->annotation->get_Annotations($key) ) {
            $newfeat->annotation->add_Annotation( $key, $value );
        }
    }
    foreach my $key ( $feat->get_all_tags() ) {
        $newfeat->add_tag_value( $key, $feat->get_tag_values($key) );
    }

    my $loc = $self->_single_loc_object_from_collection(@loc);
    $loc ? $newfeat->location($loc) : return;

    $newfeat->add_SeqFeature($_) for @adjsubfeat;
    return $newfeat;
}

sub _coord_revcom {
    my ( $self, $coord, $type, $length ) = @_;
    if ( $type eq 'BETWEEN' or $type eq 'WITHIN' ) {
        $coord =~ s/(\d+)(\D*)(\d+)/$length+1-$3.$2.$length+1-$1/ge;
    }
    else {
        $coord =~ s/(\d+)/$length+1-$1/ge;
        $coord =~ tr/<>/></;
        $coord = '>' . $coord
          if $type eq 'BEFORE' and substr( $coord, 0, 1 ) ne '>';
        $coord = '<' . $coord
          if $type eq 'AFTER' and substr( $coord, 0, 1 ) ne '<';
    }
    return $coord;
}

=head2 evolve

  Title   : evolve
  Usage   : my $newseq = Bio::SeqUtils->
                evolve($seq, $similarity, $transition_transversion_rate);
  Function: Mutates the sequence by point mutations until the similarity of
            the new sequence has decreased to the required level.
            Transition/transversion rate is adjustable.
  Returns : A new Bio::PrimarySeq object
  Args    : sequence object
            percentage similarity (e.g. 80)
            tr/tv rate, optional, defaults to 1 (= 1:1)

Set the verbosity of the Bio::SeqUtils object to positive integer to
see the mutations as they happen.

This method works only on nucleotide sequences. It prints a warning if
you set the target similarity to be less than 25%.

Transition/transversion ratio is an observed attribute of an sequence
comparison. We are dealing here with the transition/transversion rate
that we set for our model of sequence evolution.

=cut

sub evolve {
    my ( $self, $seq, $sim, $rate ) = @_;
    $rate ||= 1;

    $self->throw( 'Object [$seq] '
          . 'of class ['
          . ref($seq)
          . '] should be a Bio::PrimarySeqI ' )
      unless $seq->isa('Bio::PrimarySeqI');

    $self->throw(
        "[$sim] " . ' should be a positive integer or float under 100' )
      unless $sim =~ /^[+\d.]+$/ and $sim <= 100;

    $self->warn(
        "Nucleotide sequences are 25% similar by chance.
        Do you really want to set similarity to [$sim]%?\n"
    ) unless $sim > 25;

    $self->throw('Only nucleotide sequences are supported')
      if $seq->alphabet eq 'protein';

    # arrays of possible changes have transitions as first items
    my %changes;
    $changes{'a'} = [ 't', 'c', 'g' ];
    $changes{'t'} = [ 'a', 'c', 'g' ];
    $changes{'c'} = [ 'g', 'a', 't' ];
    $changes{'g'} = [ 'c', 'a', 't' ];

    # given the desired rate, find out where cut off points need to be
    # when random numbers are generated from 0 to 100
    # we are ignoring identical mutations (e.g. A->A) to speed things up
    my $bin_size   = 100 / ( $rate + 2 );
    my $transition = 100 - ( 2 * $bin_size );
    my $first_transversion = $transition + $bin_size;

    # unify the look of sequence strings
    my $string = lc $seq->seq;    # lower case
    $string =~
      s/u/t/;    # simplyfy our life; modules should deal with the change anyway
                 # store the original sequence string
    my $oristring = $string;
    my $length    = $seq->length;

    # stop evolving if the limit has been reached
    until ( $self->_get_similarity( $oristring, $string ) <= $sim ) {



( run in 0.989 second using v1.01-cache-2.11-cpan-39bf76dae61 )