Bio-GeneDesign

 view release on metacpan or  search on metacpan

lib/Bio/GeneDesign.pm  view on Meta::CPAN


    #clean
    my $count = $GD->count(-sequence => $my_seq);

You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object.

the count function counts the bases in a DNA sequence and returns a hash
reference where each base (including the ambiguous bases) are keys and the
values are the number of times they appear in the sequence. There are also the
special values GCp and ATp for GC and AT percentage.

=cut

sub count
{
  my ($self, @args) = @_;

  my ($seq) = $self->_rearrange([qw(sequence)], @args);

  $self->throw("No sequence provided for the count function")

lib/Bio/GeneDesign.pm  view on Meta::CPAN

  my $str = $self->_stripdown($seq, q{}, 1);

  return _count($str);
}


=head2 GC_windows

takes a nucleotide sequence, a window size, and minimum and maximum values.
returns lists of real coordinates of subsequences that violate mimimum or
maximum GC percentages.

Values are returned inside an array reference such that the first value is an
array ref of minimum violators (as array refs of left/right coordinates), and
the second value is an array ref of maximum violators.

$return_value = [
  [[left, right], [left, right]], #minimum violators
  [[left, right], [left, right]]  #maximum violators
];

lib/Bio/GeneDesign.pm  view on Meta::CPAN


=cut

sub random_dna
{
  my ($self, @args) = @_;

  my ($rlen, $rgc, $rstop)
    = $self->_rearrange([qw(
        length
        gc_percentage
        no_stops)], @args);

  $self->throw("no codon table has been defined")
    if ($rstop && ! $self->{codontable});

  $rgc = $rgc || 50;
  $self->throw("gc_percentage must be between 0 and 100")
    if ($rgc && ($rgc < 0 || $rgc > 100));

  if (! $rlen || $rlen < 1)
  {
    return q{};
  }
  elsif ($rlen == 1)
  {
    return $rgc ? _randombase_weighted($rgc)  : _randombase;
  }

lib/Bio/GeneDesign/Basic.pm  view on Meta::CPAN

  }
  my @baddies = keys %noncount;
  return ($newstrand, @baddies);
}

=head2 _count()

takes a nucleotide sequence and returns a base count.  Looks for total length,
purines, pyrimidines, and degenerate bases. If degenerate bases are present
assumes their substitution for non degenerate bases is totally random for
percentage estimation.

  in: nucleotide sequence (string),
  out: base count (hash)

=cut

sub _count
{
  my ($strand) = @_;
  my $len = length $strand;

lib/Bio/GeneDesign/Basic.pm  view on Meta::CPAN

  $C{$_}++ foreach @arr;

  $C{d} += $C{$_} foreach (qw(A T C G));
  $C{n} += $C{$_} foreach (qw(B D H K M N R S V W Y));
  $C{'?'} = ($C{d} + $C{n}) - $len;

  #Estimate how many of each degenerate base would be a G or C
  my $split = .5*$C{R}  + .5*$C{Y}  + .5*$C{K}  + .5*$C{M}  + .5*$C{N};
  my $trip  = (2*$C{B} / 3) + (2*$C{V} / 3) + ($C{D} / 3) + ($C{H} / 3);

  #Calculate GC/AT percentage
  my $gcc = $C{S} + $C{G} + $C{C} + $split + $trip;
  my $gcp = sprintf "%.1f", ($gcc / $len) * 100;
  $C{GCp} = $gcp;
  $C{ATp} = 100 - $gcp;
  $C{len} = $len;

  return \%C;
}

=head2 _gcwindows()

takes a nucleotide sequence, a window size, and minimum and maximum values.
returns lists of real coordinates of subsequences that violate mimimum or
maximum GC percentages.

Values are returned inside an array reference such that the first value is an
array ref of minimum violators (as array refs of left/right coordinates), and
the second value is an array ref of maximum violators.

$return_value = [
  [[left, right], [left, right]], #minimum violators
  [[left, right], [left, right]]  #maximum violators
];

lib/Bio/GeneDesign/Basic.pm  view on Meta::CPAN

      $botbit = $botbit =~ $REGHSH{R}  ?  1  :  0;
      $tsit++ if ($topbit == $botbit);
      $tver++ if ($topbit != $botbit);
    }
  }
  my %A;
  $A{D} = $tsit + $tver;               #changes
  $A{I} = $len - $A{D};                #identities
  $A{T} = $tsit;                       #transitions
  $A{V} = $tver;                       #transversions
  $A{P} = sprintf "%.1f", 100 - (($A{D} / $len) * 100);  #percent identity
  return \%A;
}

=head2 _amb_transcription

=cut

sub _amb_transcription
{
  my ($ntseq) = @_;

lib/Bio/GeneDesign/Graph.pm  view on Meta::CPAN

  my $data = [];
  my @legend;
  my $first = 0;
  my %AAfams = map {$_ => scalar(@{$revcodon_t->{$codon_t->{$_}}})}
               keys %$codon_t;
  my %perc_t = map {$_ => $rscu_t->{$_} / $AAfams{$_}}
               keys %$codon_t;

  foreach my $seqobj (@$arrref)
  {
    my ($x, $y)  = index_codon_percentages($seqobj->seq, $window, \%perc_t);
    push @$data, $x if ($first == 0);
    push @$data, $y;
    $first++;
    push @legend, $seqobj->id;
  }
  $graph->set_legend(@legend);
  my $format = $graph->export_format;
  return ($graph->plot($data)->$format(), $format);
}

lib/Bio/GeneDesign/Graph.pm  view on Meta::CPAN

  }
  return $BitMap->png;

  #open   (my $IMG, '>', $outfile) or croak $!;
  #binmode $IMG;
  #print   $IMG $BitMap->png;
  #close   $IMG;
  #return;
}

=head2 index_codon_percentages()

Generates two arrays for x and y values of a graph of codon percentage values.

  in: dna sequence (string),
      window size (integer),
      codon percentage table (hash reference)
  out: x values (array reference), y values (array reference)

=cut

sub index_codon_percentages
{
  my ($ntseq, $window, $perc_t) = @_;
  my @xvalues; my @yvalues;
  my $index; my $sum;
  for (my $x = int($window * (3 / 2)) - 3;
          $x < (length($ntseq) - 3 * (int($window * (3 / 2)) - 3));
          $x += 3)
  {
    $sum = 0;
    for(my $y = $x; $y < 3*$window + $x; $y += 3)

lib/Bio/GeneDesign/Random.pm  view on Meta::CPAN

  _replace_ambiguous_bases
  _weighted_rand
  _random_index
);
our %EXPORT_TAGS =  (GD => \@EXPORT_OK);

=head1 Functions

=head2 _randomDNA()

  takes a target length and a GC percentage and generates a random nucleotide
  sequence, with or without stops in the first frame
  in: nucleotide sequence length (scalar),
      GC percentage (0 <= scalar <= 100),
      stop codon prevention(0 stops allowed, else no stops),
      codon table (hash reference)
  out: nucleotide sequence (string)

=cut

sub _randomDNA
{
  my ($len, $GCperc, $stopswit, $codon_t) = @_;

t/65-random.t  view on Meta::CPAN

  plan tests => 2;

  my $rhshref =
  {
    A => num(0.20, 0.021), C => num(0.30, 0.021),
    G => num(0.30, 0.021), T => num(0.20, 0.021)
  };
  my $thshref = {};
  for my $x (1..$reps)
  {
    my $tbase = $GD->random_dna(-length => 1, -gc_percentage => 60);
    $thshref->{$tbase}++;
  }
  foreach my $key (keys %$thshref)
  {
    my $ratio = $thshref->{$key} / $reps || 0;
    $thshref->{$key} = sprintf("%.2f", $ratio);
  }
  cmp_deeply($thshref, $rhshref, "random, weighted base 1");

  $rhshref =
  {
    C => num(0.50, 0.021), G => num(0.50, 0.021),
  };
  $thshref = {};
  for my $x (1..$reps)
  {
    my $tbase = $GD->random_dna(-length => 1, -gc_percentage => 100);
    $thshref->{$tbase}++;
  }
  foreach my $key (keys %$thshref)
  {
    my $ratio = $thshref->{$key} / $reps || 0;
    $thshref->{$key} = sprintf("%.2f", $ratio);
  }
  cmp_deeply($thshref, $rhshref, "random, weighted base 2");
};



( run in 0.343 second using v1.01-cache-2.11-cpan-709fd43a63f )