BioPerl
view release on metacpan or search on metacpan
Bio/SeqUtils.pm view on Meta::CPAN
else { $strand = $_->strand }
my $newend =
$self->_coord_revcom( $_->start, $_->start_pos_type, $length );
my $newstart =
$self->_coord_revcom( $_->end, $_->end_pos_type, $length );
my $newstart_type = $_->end_pos_type;
$newstart_type = 'BEFORE' if $_->end_pos_type eq 'AFTER';
$newstart_type = 'AFTER' if $_->end_pos_type eq 'BEFORE';
my $newend_type = $_->start_pos_type;
$newend_type = 'BEFORE' if $_->start_pos_type eq 'AFTER';
$newend_type = 'AFTER' if $_->start_pos_type eq 'BEFORE';
push @loc,
$self->_location_objects_from_coordinate_list(
[ [ $newstart, $newend, $newstart_type, $newend_type ] ],
$strand, $type );
}
my $newfeat =
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag );
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
foreach my $value ( $feat->annotation->get_Annotations($key) ) {
$newfeat->annotation->add_Annotation( $key, $value );
}
}
foreach my $key ( $feat->get_all_tags() ) {
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) );
}
my $loc = $self->_single_loc_object_from_collection(@loc);
$loc ? $newfeat->location($loc) : return;
$newfeat->add_SeqFeature($_) for @adjsubfeat;
return $newfeat;
}
sub _coord_revcom {
my ( $self, $coord, $type, $length ) = @_;
if ( $type eq 'BETWEEN' or $type eq 'WITHIN' ) {
$coord =~ s/(\d+)(\D*)(\d+)/$length+1-$3.$2.$length+1-$1/ge;
}
else {
$coord =~ s/(\d+)/$length+1-$1/ge;
$coord =~ tr/<>/></;
$coord = '>' . $coord
if $type eq 'BEFORE' and substr( $coord, 0, 1 ) ne '>';
$coord = '<' . $coord
if $type eq 'AFTER' and substr( $coord, 0, 1 ) ne '<';
}
return $coord;
}
=head2 evolve
Title : evolve
Usage : my $newseq = Bio::SeqUtils->
evolve($seq, $similarity, $transition_transversion_rate);
Function: Mutates the sequence by point mutations until the similarity of
the new sequence has decreased to the required level.
Transition/transversion rate is adjustable.
Returns : A new Bio::PrimarySeq object
Args : sequence object
percentage similarity (e.g. 80)
tr/tv rate, optional, defaults to 1 (= 1:1)
Set the verbosity of the Bio::SeqUtils object to positive integer to
see the mutations as they happen.
This method works only on nucleotide sequences. It prints a warning if
you set the target similarity to be less than 25%.
Transition/transversion ratio is an observed attribute of an sequence
comparison. We are dealing here with the transition/transversion rate
that we set for our model of sequence evolution.
=cut
sub evolve {
my ( $self, $seq, $sim, $rate ) = @_;
$rate ||= 1;
$self->throw( 'Object [$seq] '
. 'of class ['
. ref($seq)
. '] should be a Bio::PrimarySeqI ' )
unless $seq->isa('Bio::PrimarySeqI');
$self->throw(
"[$sim] " . ' should be a positive integer or float under 100' )
unless $sim =~ /^[+\d.]+$/ and $sim <= 100;
$self->warn(
"Nucleotide sequences are 25% similar by chance.
Do you really want to set similarity to [$sim]%?\n"
) unless $sim > 25;
$self->throw('Only nucleotide sequences are supported')
if $seq->alphabet eq 'protein';
# arrays of possible changes have transitions as first items
my %changes;
$changes{'a'} = [ 't', 'c', 'g' ];
$changes{'t'} = [ 'a', 'c', 'g' ];
$changes{'c'} = [ 'g', 'a', 't' ];
$changes{'g'} = [ 'c', 'a', 't' ];
# given the desired rate, find out where cut off points need to be
# when random numbers are generated from 0 to 100
# we are ignoring identical mutations (e.g. A->A) to speed things up
my $bin_size = 100 / ( $rate + 2 );
my $transition = 100 - ( 2 * $bin_size );
my $first_transversion = $transition + $bin_size;
# unify the look of sequence strings
my $string = lc $seq->seq; # lower case
$string =~
s/u/t/; # simplyfy our life; modules should deal with the change anyway
# store the original sequence string
my $oristring = $string;
my $length = $seq->length;
# stop evolving if the limit has been reached
until ( $self->_get_similarity( $oristring, $string ) <= $sim ) {
( run in 0.989 second using v1.01-cache-2.11-cpan-39bf76dae61 )