Bio-BioStudio
view release on metacpan or search on metacpan
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
#Exclude oligos that begin or end in unswappable codons
my $peptide = _translate($woligo, $dir, $GD->{codontable});
my $firstres = substr $peptide, 0, 1;
my $lastres = substr $peptide, -1, 1;
if (exists $BADAAS{$firstres} || exists $BADAAS{$lastres})
{
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
next;
}
#Exclude oligos that don't meet melting standard
my $currTm = _ntherm($oligo);
if ($currTm < $MINTAGMELT || $currTm > $MAXTAGMELT)
{
if ($currTm < $MINTAGMELT)
{
$rstart = $rstart + 3;
$count = $MAXTAGLEN;
}
elsif ($currTm > $MAXTAGMELT)
{
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
}
next;
}
#Exclude oligos that aren't entirely in exons or lapped by other genes
my @ofeats = $mask->feature_objects_in_range($rstart, $count);
my @ifeats = grep { $_->primary_tag eq 'intron' } @ofeats;
if (scalar @ifeats || scalar @ofeats > 1)
{
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
next;
}
#Recode oligos
$woligo = _complement($woligo, 1) if ($dir == -1);
my $wmdoligo = $GD->codon_juggle(
-sequence => $woligo,
-algorithm => 'most_different_sequence'
);
#Ensure that the first two bases are the same
if ((substr $wmdoligo, 0, 2) ne (substr $woligo, 0, 2))
{
my $codon = substr $woligo, 0, 3;
my $aa = $GD->{codontable}->{$codon};
my $di = substr $codon, 0, 2;
my $possibles = $GD->{reversecodontable}->{$aa};
my @choices = grep {(substr $_, 0, 2) eq $di} @{$possibles};
@choices = grep {(substr $_, -1) ne (substr $codon, -1)} @choices;
substr $wmdoligo, 0, 3, $choices[0];
}
$woligo = _complement($woligo, 1) if ($dir == -1);
$wmdoligo = _complement($wmdoligo, 1) if ($dir == -1);
my $mdoligo = $dir == 1
? substr($wmdoligo, 2, $wolen - 2)
: substr($wmdoligo, 0, $wolen - 2);
#Exclude oligos whose recodes don't meet percent difference standard
my $comps = _compare_sequences($oligo, $mdoligo);
if ( $comps->{P} < $MINPERDIFF)
{
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
next;
}
#Exclude oligos whose recodes don't meet melting standard
my $MDTm = _ntherm($mdoligo);
if ($MDTm < $MINTAGMELT || $MDTm > $MAXTAGMELT)
{
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
next;
}
my $offset = $tstart - $gstart + 1;
my $tagid = $gid . q{_} . $offset;
my $tag = Bio::BioStudio::SeqFeature::Tag->new(
-start => $offset,
-end => $offset + $olen - 1,
-display_name => $tagid,
-ingene => $gid,
-wtseq => $oligo,
-newseq => $mdoligo,
-wtpos => $tstart,
-difference => $comps->{P},
-translation => $peptide,
);
push @pretags, $tag;
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
}
return \@pretags;
}
=head2 BLAST_tags
=cut
sub BLAST_tags
{
my ($tagarr, $factory) = @_;
my @tags = @{$tagarr};
#BLAST all tags against the latest version of the genome
my @tagobjs = ();
foreach my $tag (@tags)
{
my $id = $tag->display_name;
my $wtobj = Bio::Seq->new(-seq => $tag->wtseq(), id => 'wt_' . $id);
my $newobj = Bio::Seq->new(-seq => $tag->newseq(), id => 'md_' . $id);
push @tagobjs, $wtobj, $newobj;
}
$factory->run(
-method => 'blastn',
-query => \@tagobjs,
-method_args => [
-word_size => 17,
-perc_identity => 70
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
return \@chosen;
}
=head2 BLAST_pairs
=cut
sub BLAST_pairs
{
my ($utag, $dntags, $factory) = @_;
#BLAST the pair to check amplification uniqueness.
#
my %phits = ();
my %objs = ();
# The utag wt and new sequences, forwards
my $uid = $utag->display_name;
my $uwtid = 'rcwt_' . $uid;
$phits{$uwtid} = [];
$objs{$uwtid} = Bio::Seq->new(-seq => $utag->wtseq(), id => $uwtid);
my $unewid = 'rcmd_' . $uid;
$phits{$unewid} = [];
$objs{$unewid} = Bio::Seq->new(-seq => $utag->newseq(), id => $unewid);
# The dtags wt and new sequences, reverses
my @dtagids = ();
my @downbuds = @{$dntags};
my %refs;
foreach my $dtag (@downbuds)
{
my $did = $dtag->display_name;
$refs{$did} = $dtag;
push @dtagids, $did;
my $dwtid = 'rcwt_' . $did;
$phits{$dwtid} = [];
$objs{$dwtid} = Bio::Seq->new(-seq => $dtag->wtseq(), id => $dwtid);
my $dnewid = 'rcmd_' . $did;
$phits{$dnewid} = [];
$objs{$dnewid} = Bio::Seq->new(-seq => $dtag->newseq(), id => $dnewid);
}
my @pairobjs = values %objs;
$factory->run(
-method => 'blastn',
-query => \@pairobjs,
-method_args => [
-word_size => 4,
-gapextend => 2,
-gapopen => 1,
-penalty => -1,
-perc_identity => 70,
-best_hit_overhang => .25
]);
$factory->rewind_results;
while (my $result = $factory->next_result)
{
my $name = $result->query_name();
my $qlen = length $objs{$name}->seq;
while( my $hit = $result->next_hit())
{
while( my $hsp = $hit->next_hsp())
{
next if $hsp->percent_identity < 85;
next if ($hsp->length < .7 * $qlen);
push @{$phits{$name}}, [$hit, $hsp];
}
}
}
my $partner = undef;
my @Fwts = @{$phits{'rcwt_' . $uid}};
my @Fmds = @{$phits{'rcmd_' . $uid}};
foreach my $dtagid (@dtagids)
{
my ($wtnogo, $mdnogo) = (0, 0);
my @Rwts = @{$phits{'rcwt_' . $dtagid}};
foreach my $Rwpair (@Rwts)
{
my ($Rwhit, $Rwhsp) = @{$Rwpair};
foreach my $Fwpair (@Fwts)
{
my ($Fwhit, $Fwhsp) = @{$Fwpair};
#Pass if hits are on different chromosomes
next if ($Rwhit->name ne $Fwhit->name);
#Pass if hits are on the same strand
next if ($Rwhsp->strand eq $Fwhsp->strand);
$wtnogo++;
}
}
my @Rmds = @{$phits{'rcmd_' . $dtagid}};
foreach my $Rmpair (@Rmds)
{
my ($Rmhit, $Rmhsp) = @{$Rmpair};
foreach my $Fmpair (@Fmds)
{
my ($Fmhit, $Fmhsp) = @{$Fmpair};
#Pass if hits are on different chromosomes
next if ($Rmhit->name ne $Fmhit->name);
#Pass if hits are on the same strand
next if ($Rmhsp->strand eq $Fmhsp->strand);
$mdnogo++;
}
}
if ($mdnogo + $wtnogo == 0)
{
$partner = $refs{$dtagid};
last;
}
}
$factory->cleanup;
return $partner;
}
=head2 counterset
=cut
sub counterset
{
my ($count, $start, $minlen, $maxlen) = @_;
$start = $start + 3 if ($count < $minlen + 2);
$count = $count >= $minlen + 2 ? $count - 3 : $maxlen;
return ($count, $start);
}
( run in 0.558 second using v1.01-cache-2.11-cpan-39bf76dae61 )