Bio-Gonzales
view release on metacpan or search on metacpan
lib/Bio/Gonzales/Seq.pm view on Meta::CPAN
sub pretty { shift->all_formatted(@_) }
sub as_primaryseq {
my ($self) = @_;
return Bio::PrimarySeq->new(
-seq => $self->seq,
-id => $self->id,
-desc => $self->desc,
-alphabet => $self->guess_alphabet,
-direct => 1,
);
}
sub guess_alphabet {
my ($self) = @_;
my $str = $self->seq();
$str =~ s/[-.?*]//gi;
my $alphabet;
# Check for sequences without valid letters
my $total = CORE::length($str);
if ( $str =~ m/[EFIJLOPQXZ]/i ) {
# Start with a safe method to find proteins.
# Unambiguous IUPAC letters for proteins are: E,F,I,J,L,O,P,Q,X,Z
$alphabet = 'protein';
} else {
# Alphabet is unsure, could still be DNA, RNA or protein.
# DNA and RNA contain mostly A, T, U, G, C and N, but the other letters
# they use are also among the 15 valid letters that a protein sequence
# can contain at this stage. Make our best guess based on sequence
# composition. If it contains over 70% of ACGTUN, it is likely nucleic.
if ( ( $str =~ tr/ATUGCNatugcn// ) / $total > 0.7 ) {
if ( $str =~ m/U/i ) {
$alphabet = 'rna';
} else {
$alphabet = 'dna';
}
} else {
$alphabet = 'protein';
}
}
return $alphabet;
}
sub revcom {
my ($self) = @_;
$self->seq( _revcom_from_string( $self->seq, $self->guess_alphabet ) );
return $self;
}
sub subseq {
my ( $self, $range, $c ) = @_;
my ( $seq, $corrected_range ) = $self->subseq_as_string( $range, $c );
my ( $b, $e, $strand, @rest ) = @$corrected_range;
lib/Bio/Gonzales/Seq.pm view on Meta::CPAN
my $seq = substr( $self->{seq}, $b, $e - $b );
if ( $strand && $strand < 0 ) {
if ( $c->{relaxed_revcom} ) {
$seq =~ y/AGCTNagctn/N/c;
} else {
confess "cannot create reverse complement, sequence contains non-AGCTN characters"
if ( $seq =~ /[^AGCTN]/i );
}
$seq = _revcom_from_string($seq, $self->_guess_alphabet);
}
return wantarray ? ( $seq, [ $b, $e, $strand, @rest ] ) : $seq;
}
sub _revcom_from_string {
my ($string, $alphabet) = @_;
# Check that reverse-complementing makes sense
if( $alphabet eq 'protein' ) {
( run in 1.190 second using v1.01-cache-2.11-cpan-748bfb374f4 )