Bio-GeneDesign
view release on metacpan or search on metacpan
lib/Bio/GeneDesign.pm view on Meta::CPAN
#clean
my $count = $GD->count(-sequence => $my_seq);
You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object.
the count function counts the bases in a DNA sequence and returns a hash
reference where each base (including the ambiguous bases) are keys and the
values are the number of times they appear in the sequence. There are also the
special values GCp and ATp for GC and AT percentage.
=cut
sub count
{
my ($self, @args) = @_;
my ($seq) = $self->_rearrange([qw(sequence)], @args);
$self->throw("No sequence provided for the count function")
lib/Bio/GeneDesign.pm view on Meta::CPAN
my $str = $self->_stripdown($seq, q{}, 1);
return _count($str);
}
=head2 GC_windows
takes a nucleotide sequence, a window size, and minimum and maximum values.
returns lists of real coordinates of subsequences that violate mimimum or
maximum GC percentages.
Values are returned inside an array reference such that the first value is an
array ref of minimum violators (as array refs of left/right coordinates), and
the second value is an array ref of maximum violators.
$return_value = [
[[left, right], [left, right]], #minimum violators
[[left, right], [left, right]] #maximum violators
];
lib/Bio/GeneDesign.pm view on Meta::CPAN
=cut
sub random_dna
{
my ($self, @args) = @_;
my ($rlen, $rgc, $rstop)
= $self->_rearrange([qw(
length
gc_percentage
no_stops)], @args);
$self->throw("no codon table has been defined")
if ($rstop && ! $self->{codontable});
$rgc = $rgc || 50;
$self->throw("gc_percentage must be between 0 and 100")
if ($rgc && ($rgc < 0 || $rgc > 100));
if (! $rlen || $rlen < 1)
{
return q{};
}
elsif ($rlen == 1)
{
return $rgc ? _randombase_weighted($rgc) : _randombase;
}
lib/Bio/GeneDesign/Basic.pm view on Meta::CPAN
}
my @baddies = keys %noncount;
return ($newstrand, @baddies);
}
=head2 _count()
takes a nucleotide sequence and returns a base count. Looks for total length,
purines, pyrimidines, and degenerate bases. If degenerate bases are present
assumes their substitution for non degenerate bases is totally random for
percentage estimation.
in: nucleotide sequence (string),
out: base count (hash)
=cut
sub _count
{
my ($strand) = @_;
my $len = length $strand;
lib/Bio/GeneDesign/Basic.pm view on Meta::CPAN
$C{$_}++ foreach @arr;
$C{d} += $C{$_} foreach (qw(A T C G));
$C{n} += $C{$_} foreach (qw(B D H K M N R S V W Y));
$C{'?'} = ($C{d} + $C{n}) - $len;
#Estimate how many of each degenerate base would be a G or C
my $split = .5*$C{R} + .5*$C{Y} + .5*$C{K} + .5*$C{M} + .5*$C{N};
my $trip = (2*$C{B} / 3) + (2*$C{V} / 3) + ($C{D} / 3) + ($C{H} / 3);
#Calculate GC/AT percentage
my $gcc = $C{S} + $C{G} + $C{C} + $split + $trip;
my $gcp = sprintf "%.1f", ($gcc / $len) * 100;
$C{GCp} = $gcp;
$C{ATp} = 100 - $gcp;
$C{len} = $len;
return \%C;
}
=head2 _gcwindows()
takes a nucleotide sequence, a window size, and minimum and maximum values.
returns lists of real coordinates of subsequences that violate mimimum or
maximum GC percentages.
Values are returned inside an array reference such that the first value is an
array ref of minimum violators (as array refs of left/right coordinates), and
the second value is an array ref of maximum violators.
$return_value = [
[[left, right], [left, right]], #minimum violators
[[left, right], [left, right]] #maximum violators
];
lib/Bio/GeneDesign/Basic.pm view on Meta::CPAN
$botbit = $botbit =~ $REGHSH{R} ? 1 : 0;
$tsit++ if ($topbit == $botbit);
$tver++ if ($topbit != $botbit);
}
}
my %A;
$A{D} = $tsit + $tver; #changes
$A{I} = $len - $A{D}; #identities
$A{T} = $tsit; #transitions
$A{V} = $tver; #transversions
$A{P} = sprintf "%.1f", 100 - (($A{D} / $len) * 100); #percent identity
return \%A;
}
=head2 _amb_transcription
=cut
sub _amb_transcription
{
my ($ntseq) = @_;
lib/Bio/GeneDesign/Graph.pm view on Meta::CPAN
my $data = [];
my @legend;
my $first = 0;
my %AAfams = map {$_ => scalar(@{$revcodon_t->{$codon_t->{$_}}})}
keys %$codon_t;
my %perc_t = map {$_ => $rscu_t->{$_} / $AAfams{$_}}
keys %$codon_t;
foreach my $seqobj (@$arrref)
{
my ($x, $y) = index_codon_percentages($seqobj->seq, $window, \%perc_t);
push @$data, $x if ($first == 0);
push @$data, $y;
$first++;
push @legend, $seqobj->id;
}
$graph->set_legend(@legend);
my $format = $graph->export_format;
return ($graph->plot($data)->$format(), $format);
}
lib/Bio/GeneDesign/Graph.pm view on Meta::CPAN
}
return $BitMap->png;
#open (my $IMG, '>', $outfile) or croak $!;
#binmode $IMG;
#print $IMG $BitMap->png;
#close $IMG;
#return;
}
=head2 index_codon_percentages()
Generates two arrays for x and y values of a graph of codon percentage values.
in: dna sequence (string),
window size (integer),
codon percentage table (hash reference)
out: x values (array reference), y values (array reference)
=cut
sub index_codon_percentages
{
my ($ntseq, $window, $perc_t) = @_;
my @xvalues; my @yvalues;
my $index; my $sum;
for (my $x = int($window * (3 / 2)) - 3;
$x < (length($ntseq) - 3 * (int($window * (3 / 2)) - 3));
$x += 3)
{
$sum = 0;
for(my $y = $x; $y < 3*$window + $x; $y += 3)
lib/Bio/GeneDesign/Random.pm view on Meta::CPAN
_replace_ambiguous_bases
_weighted_rand
_random_index
);
our %EXPORT_TAGS = (GD => \@EXPORT_OK);
=head1 Functions
=head2 _randomDNA()
takes a target length and a GC percentage and generates a random nucleotide
sequence, with or without stops in the first frame
in: nucleotide sequence length (scalar),
GC percentage (0 <= scalar <= 100),
stop codon prevention(0 stops allowed, else no stops),
codon table (hash reference)
out: nucleotide sequence (string)
=cut
sub _randomDNA
{
my ($len, $GCperc, $stopswit, $codon_t) = @_;
t/65-random.t view on Meta::CPAN
plan tests => 2;
my $rhshref =
{
A => num(0.20, 0.021), C => num(0.30, 0.021),
G => num(0.30, 0.021), T => num(0.20, 0.021)
};
my $thshref = {};
for my $x (1..$reps)
{
my $tbase = $GD->random_dna(-length => 1, -gc_percentage => 60);
$thshref->{$tbase}++;
}
foreach my $key (keys %$thshref)
{
my $ratio = $thshref->{$key} / $reps || 0;
$thshref->{$key} = sprintf("%.2f", $ratio);
}
cmp_deeply($thshref, $rhshref, "random, weighted base 1");
$rhshref =
{
C => num(0.50, 0.021), G => num(0.50, 0.021),
};
$thshref = {};
for my $x (1..$reps)
{
my $tbase = $GD->random_dna(-length => 1, -gc_percentage => 100);
$thshref->{$tbase}++;
}
foreach my $key (keys %$thshref)
{
my $ratio = $thshref->{$key} / $reps || 0;
$thshref->{$key} = sprintf("%.2f", $ratio);
}
cmp_deeply($thshref, $rhshref, "random, weighted base 2");
};
( run in 0.343 second using v1.01-cache-2.11-cpan-709fd43a63f )