Bio-GeneDesign

 view release on metacpan or  search on metacpan

lib/Bio/GeneDesign.pm  view on Meta::CPAN


  my ($seq) = $self->_rearrange([qw(sequence)], @args);

  $self->throw('No sequence provided for the rcomplement function')
    unless $seq;

  my $str = $self->_stripdown($seq, q{}, 1);

  return _complement($str, 1);
}

=head2 transcribe

    $my_seq = "AATTCG";

    my $RNA_seq = $GD->transcribe($my_seq);
    $complemented_seq eq "AAUUCG" || die;

The -sequence argument is required.

Transcribes an RNA sequence from a DNA sequence.

You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be processed.

=cut

sub transcribe
{
  my ($self, @args) = @_;

  my ($seq) = $self->_rearrange([qw(sequence)], @args);

  $self->throw("No sequence provided for the transcribe function")
    unless $seq;

  my $str = $self->_stripdown($seq, q{}, 1);

  return _toRNA($str);
}

=head2 count

    $my_seq = "AATTCG";
    my $count = $GD->count($my_seq);
    $count->{C} == 1 || die;
    $count->{G} == 1 || die;
    $count->{A} == 2 || die;
    $count->{GCp} == 33.3 || die;
    $count->{ATp} == 66.7 || die;

    #clean
    my $count = $GD->count(-sequence => $my_seq);

You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object.

the count function counts the bases in a DNA sequence and returns a hash
reference where each base (including the ambiguous bases) are keys and the
values are the number of times they appear in the sequence. There are also the
special values GCp and ATp for GC and AT percentage.

=cut

sub count
{
  my ($self, @args) = @_;

  my ($seq) = $self->_rearrange([qw(sequence)], @args);

  $self->throw("No sequence provided for the count function")
    unless ($seq);


  my $str = $self->_stripdown($seq, q{}, 1);

  return _count($str);
}


=head2 GC_windows

takes a nucleotide sequence, a window size, and minimum and maximum values.
returns lists of real coordinates of subsequences that violate mimimum or
maximum GC percentages.

Values are returned inside an array reference such that the first value is an
array ref of minimum violators (as array refs of left/right coordinates), and
the second value is an array ref of maximum violators.

$return_value = [
  [[left, right], [left, right]], #minimum violators
  [[left, right], [left, right]]  #maximum violators
];

=cut

sub GC_windows
{
  my ($self, @args) = @_;

  my ($seq, $win, $min, $max) = $self->_rearrange([qw(
    sequence window minimum maximum)], @args);

  $self->throw("No sequence provided for the GC_windows function")
    unless ($seq);

  my $str = $self->_stripdown($seq, q{}, 1);

  return _gcwindows($str, $win, $min, $max);
}

=head2 regex_nt

    my $my_seq = "ABC";
    my $regex = $GD->regex_nt(-sequence => $my_seq);
    # $regex is qr/A[CGT]C/;

    my $regarr = $GD->regex_nt(-sequence => $my_seq --reverse_complement => 1);
    # $regarr is [qr/A[CGT]C/, qr/G[ACG]T/]


You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be processed with the -sequence flag.

regex_nt creates a compiled regular expression or a set of them that can be used
to query large nucleotide sequences for possibly ambiguous subsequences.


If you want to get regular expressions for both the forward and reverse senses
of the DNA, use the -reverse_complement flag and expect a reference to an array
of compiled regexes.

=cut

sub regex_nt
{
  my ($self, @args) = @_;

  my ($seq, $arrswit)
    = $self->_rearrange([qw(
        sequence
        reverse_complement)], @args
  );

lib/Bio/GeneDesign.pm  view on Meta::CPAN

NO TEST

=cut

sub import_seq_from_string
{
  my ($self, $string) = @_;
  my ($iterator, $filename, $suffix) = _import_sequences_from_string($string);
  return ($iterator, $filename, $suffix);
}

=head2 export_formats

Export formats that have been tried and tested to work well.

=cut

sub export_formats
{
  return Bio::GeneDesign::IO::_export_formats();
}

=head2 export_seqs

NO TEST

=cut

sub export_seqs
{
  my ($self, @args) = @_;

  my ($outpath, $outformat, $seqarr)
    = $self->_rearrange([qw(
        filepath
        format
        sequences)], @args);

  $outformat = $outformat ? $outformat : 'genbank';
  $self->throw("$outformat is not a format recognized by BioPerl")
    if (! _isa_BP_format($outformat));

  #Long attributes that come in from a genbank file will have corruption
  #remove spaces and reattribute to fix bbs in genbank file ):
  _long_att_fix($seqarr) if ($outformat eq 'genbank');
  
  return _export_sequences($outpath, $outformat, $seqarr);
}

=head2 random_dna

=cut

sub random_dna
{
  my ($self, @args) = @_;

  my ($rlen, $rgc, $rstop)
    = $self->_rearrange([qw(
        length
        gc_percentage
        no_stops)], @args);

  $self->throw("no codon table has been defined")
    if ($rstop && ! $self->{codontable});

  $rgc = $rgc || 50;
  $self->throw("gc_percentage must be between 0 and 100")
    if ($rgc && ($rgc < 0 || $rgc > 100));

  if (! $rlen || $rlen < 1)
  {
    return q{};
  }
  elsif ($rlen == 1)
  {
    return $rgc ? _randombase_weighted($rgc)  : _randombase;
  }
  return _randomDNA($rlen, $rgc, $rstop, $self->{codontable});
}

=head2 replace_ambiguous_bases

=cut

sub replace_ambiguous_bases
{
  my ($self, $seq) = @_;

  $self->throw("no sequence provided ")
    unless ($seq);

  my $str = $self->_stripdown($seq, q{}, 1);

  my $newstr = _replace_ambiguous_bases($str);

  if (ref $seq)
  {
    my $newobj = $seq->clone();
    my $desc = $newobj->desc ?  $newobj->desc . q{ } : q{};
    $desc .= "deambiguated";
    $newobj->seq($newstr);
    $newobj->desc($desc);
    return $newobj;
  }
  else
  {
    return $newstr;
  }
}

=head1 PLEASANTRIES

=head2 pad

    my $name = 5;
    my $nice = $GD->pad($name, 3);
    $nice == "005" || die;

    $name = "oligo";
    $nice = $GD->pad($name, 7, "_");
    $nice == "__oligo" || die;

Pads an integer with leading zeroes (by default) or any provided set of
characters. This is useful both to make reports pretty and to standardize the
length of designations.

=cut



( run in 2.122 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )