Bio-Gonzales

 view release on metacpan or  search on metacpan

lib/Bio/Gonzales/Seq.pm  view on Meta::CPAN


sub pretty { shift->all_formatted(@_) }

sub as_primaryseq {
  my ($self) = @_;

  return Bio::PrimarySeq->new(
    -seq      => $self->seq,
    -id       => $self->id,
    -desc     => $self->desc,
    -alphabet => $self->guess_alphabet,
    -direct   => 1,
  );
}

sub guess_alphabet {
  my ($self) = @_;

  my $str = $self->seq();
  $str =~ s/[-.?*]//gi;

  my $alphabet;

  # Check for sequences without valid letters
  my $total = CORE::length($str);

  if ( $str =~ m/[EFIJLOPQXZ]/i ) {
    # Start with a safe method to find proteins.
    # Unambiguous IUPAC letters for proteins are: E,F,I,J,L,O,P,Q,X,Z
    $alphabet = 'protein';
  } else {
    # Alphabet is unsure, could still be DNA, RNA or protein.
    # DNA and RNA contain mostly A, T, U, G, C and N, but the other letters
    # they use are also among the 15 valid letters that a protein sequence
    # can contain at this stage. Make our best guess based on sequence
    # composition. If it contains over 70% of ACGTUN, it is likely nucleic.
    if ( ( $str =~ tr/ATUGCNatugcn// ) / $total > 0.7 ) {
      if ( $str =~ m/U/i ) {
        $alphabet = 'rna';
      } else {
        $alphabet = 'dna';
      }
    } else {
      $alphabet = 'protein';
    }
  }
  return $alphabet;
}

sub revcom {
  my ($self) = @_;

  $self->seq( _revcom_from_string( $self->seq, $self->guess_alphabet ) );

  return $self;
}

sub subseq {
  my ( $self, $range, $c ) = @_;

  my ( $seq, $corrected_range ) = $self->subseq_as_string( $range, $c );
  my ( $b, $e, $strand, @rest ) = @$corrected_range;

lib/Bio/Gonzales/Seq.pm  view on Meta::CPAN

  my $seq = substr( $self->{seq}, $b, $e - $b );

  if ( $strand && $strand < 0 ) {
    if ( $c->{relaxed_revcom} ) {
      $seq =~ y/AGCTNagctn/N/c;
    } else {
      confess "cannot create reverse complement, sequence contains non-AGCTN characters"
        if ( $seq =~ /[^AGCTN]/i );
    }

    $seq = _revcom_from_string($seq, $self->_guess_alphabet);
  }

  return wantarray ? ( $seq, [ $b, $e, $strand, @rest ] ) : $seq;
}

sub _revcom_from_string {
   my ($string, $alphabet) = @_;

   # Check that reverse-complementing makes sense
   if( $alphabet eq 'protein' ) {



( run in 1.190 second using v1.01-cache-2.11-cpan-748bfb374f4 )