view release on metacpan or search on metacpan
bin/BS_CodonJuggler.pl view on Meta::CPAN
if ($newpep ne $oldpep)
{
$state->{$gname}->[2] .= ' Change in amino acid sequence;';
}
}
#Do reporting
print "\nReport:\n";
foreach my $gname (sort keys %{$state})
{
my $gid = $state->{$gname}->[3];
my @results = @{$state->{$gname}};
next unless($results[0] || $results[1] || $results[2]);
print $gname, q{ : };
if ($results[0])
{
my $plural = $results[0] > 1 ? 's' : q{};
print "$results[0] $p{FROM} codon$plural changed; ";
}
if ($results[1])
{
bin/BS_PCRTagger.pl view on Meta::CPAN
-types => 'gene',
-start => $p{STARTPOS},
-end => $p{STOPPOS},
-range_type => 'contains'
);
foreach my $gene (@genes)
{
my $gstart = $gene->start;
my $gend = $gene->end;
my $glen = $gend - $gstart + 1;
my $gid = $gene->Tag_load_id;
my $newgeneseq = substr $newseq, $gstart - 1, $glen;
my $oldgeneseq = substr $chrseq, $gstart - 1, $glen;
if ($newgeneseq eq $oldgeneseq)
{
$report{$gid} .= ' No change in sequence;';
}
my $cdna = $chr->make_cDNA($gene);
my $oldpep = $GD->translate(-sequence => $cdna);
my $newcdna = $newchr->make_cDNA($gene);
my $newpep = $GD->translate(-sequence => $newcdna);
if ($newpep ne $oldpep)
{
$report{$gid} .= ' Change in amino acid sequence;';
}
}
print "\n\n";
foreach my $gid (sort keys %report)
{
print "$gid : $report{$gid}\n";
}
#Tell chromosome to write itself
$newchr->add_reason($p{EDITOR}, $p{MEMO});
$newchr->write_chromosome();
exit;
__END__
lib/Bio/BioStudio/CodonJuggling.pm view on Meta::CPAN
my @JOBLIST;
my @OUTS;
my @CLEANUP = ($parampath);
my $exec = 'BS_auto_codon_juggle.pl';
my $hookups = {};
my $memo = {};
foreach my $gene (@pgenes)
{
my $gname = $gene->display_name;
my $cmd = $exec . q{ -chr } . $chrname . q{ -key } . $key;
$cmd .= q{ -gid } . $gname;
my $outpath = $tmp_path . $key . q{_} . $gname . '.out';
push @JOBLIST, $cmd . q{:} . $outpath . q{:0};
push @OUTS, $outpath;
$hookups->{$outpath} = $gname;
}
my $jobs = join "\n", @JOBLIST;
my $morefiles = taskfarm($jobs, $chrname, $key);
foreach my $out (@OUTS)
{
if (! -e $out)
{
my $gid = $hookups->{$out};
$state->{$gid}->[2] .= ' Codon juggling failed! ';
next;
}
my $result = retrieve($out);
foreach my $gname (keys %{$result})
{
if (exists $state->{$gname})
{
$state->{$gname}->[0] += $result->{$gname}->[0];
$state->{$gname}->[1] += $result->{$gname}->[1];
$state->{$gname}->[2] .= $result->{$gname}->[2];
lib/Bio/BioStudio/CodonJuggling.pm view on Meta::CPAN
}
return $state;
}
=head2 juggle_gene
=cut
sub juggle_gene
{
my ($chr, $mask, $gid, $p) = @_;
my $gene = $chr->fetch_features(-name => $gid);
my $GD = $chr->GD();
my $chrseq = $chr->sequence();
my $return = {};
my $offset = $gene->start() - 1;
my $gmask = Bio::BioStudio::Mask->new(-sequence => $gene, -offset => $offset);
my @subfeats = $chr->flatten_subfeats($gene);
$gmask->add_to_mask(\@subfeats);
my %subs = map {$_->primary_id => 1} @subfeats;
$subs{$gid}++;
my $orient = $gene->strand();
my $gstart = $gene->start();
my $gend = $gene->end();
my $glen = $gend - $gstart + 1;
#number of codons, number of alts, notes, $gid, codons
$return->{$gid} = [0, 0, q{}, []];
my $x = $gstart;
while ($x <= $gend - 2)
{
##Adjust for introns
my %posa = $gmask->what_objects_overlap($x);
my @introns = grep {$_->primary_tag eq 'intron'} values %posa;
$x = $introns[0]->end + 1 if (scalar @introns);
my $basea = $x;
$x++;
lib/Bio/BioStudio/CodonJuggling.pm view on Meta::CPAN
my $cod = (substr $chrseq, $basea - 1, 1)
. (substr $chrseq, $baseb - 1, 1)
. (substr $chrseq, $basec - 1, 1);
next if ($orient < 0 && $cod ne $p->{MORF});
next if ($orient >= 0 && $cod ne $p->{FROM});
##Annotate a potential codon change
my $newcod = $orient > 0 ? $p->{TO} : $p->{OT};
my $oldcod = $orient > 0 ? $p->{FROM} : $p->{MORF};
my $oset = $basea - $gstart + 1;
my $codname = $gid . q{_} . $p->{OLDAA} . $oset . $p->{NEWAA};
my $codon = Bio::BioStudio::SeqFeature::Codon->new(
-start => $basea,
-end => $basec,
-primary_tag => $p->{SWAPTYPE},
-display_name => $codname,
-wtseq => $oldcod,
-newseq => $newcod,
-parent => $gid
);
## Check for overlaps
my $maskbit = $mask->count_features_in_range($basea, 3);
## No overlap involved - make the change and move on
#
if ($maskbit == 1)
{
$return->{$gid}->[0]++;
push @{$return->{$gid}->[3]}, $codon;
next;
}
## Overlap involved:
#
my @featlaps = $mask->feature_objects_in_range($basea, 3);
my @theseolaps = grep {! exists $subs{$_->primary_id}} @featlaps;
my $lapcount = scalar @theseolaps;
# If there are more than one overlapping features, no change can be made
if ($lapcount > 1)
{
$return->{$gid}->[2] .= "cannot change codon at $basea (too occluded) ";
next;
}
# If there are zero, we have run into an internal error
elsif ($lapcount < 1)
{
$return->{$gid}->[2] .= "cannot change codon at $basea (INTERNAL ERROR) ";
next;
}
# If the only overlapping feature is an intron, no change can be made
if ($theseolaps[0]->primary_tag eq 'intron')
{
$return->{$gid}->[2] .= "cannot change codon at $basea (overlaps an intron)";
next;
}
# Now the overlap must be a CDS; determine the frame
my $lapfeat = $theseolaps[0];
my $lapname = $lapfeat->display_name;
my $lapstart = $lapfeat->start;
my $laporient = $lapfeat->strand;
my $laplen = $lapfeat->end - $lapstart + 1;
my $CDSseq = substr $chrseq, $lapstart - 1, $laplen;
lib/Bio/BioStudio/CodonJuggling.pm view on Meta::CPAN
# If the change doesn't preserve overlapping sequence and no exceptions were
# provided, skip
my $lapstatus = $lapfeat->Tag_orf_classification;
my $genstatus = $gene->Tag_orf_classification;
my $dubwhack = $lapstatus eq 'Dubious' && $genstatus ne 'Dubious' ? 1 : 0;
my $verwhack = $genstatus ne 'Dubious' ? 1 : 0;
if ($allowflag == 0 && (! $p->{ALLWHACK}
|| ! ($dubwhack && $p->{DUBWHACK}) || ! ($verwhack && $p->{VERWHACK})))
{
$return->{$gid}->[2] .= "cannot change codon at $basea (overlaps $lapname)";
next;
}
# Add the codon for this gene
$return->{$gid}->[0]++;
push @{$return->{$gid}->[3]}, $codon;
# Add the codon for the overlapping gene
my $newCDSseq = substr $chrseq, $lapstart - 1, $laplen;
my $newframe = substr $newCDSseq, $syncodstart, $length;
for (my $offset = 0; $offset <= $length - 3; $offset += 3)
{
my $oldsyncod = substr $inframe, $offset, 3;
my $newsyncod = substr $newframe, $offset, 3;
my $pos = $syncodstart + $offset + $lapstart + 1;
lib/Bio/BioStudio/CodonJuggling.pm view on Meta::CPAN
my $oldLaa = $GD->codontable->{$oldLcod};
my $swaptype = $GD->codon_change_type(-from => $oldLcod, -to => $newLcod);
$newLcod = $newsyncod if ($laporient == -1);
$oldLcod = $oldsyncod if ($laporient == -1);
my $Loffset = $pos - $lapstart + 1;
my $parent = $lapfeat->has_tag('parent_id') ? $lapfeat->Tag_parent_id : $lapname;
$return->{$parent} = [0, 0, q{}, []] if (! exists $return->{$parent});
my $codLname = $parent . q{_} . $oldLaa . $Loffset . $newLaa;
my $modnote = " $codLname modified to accommodate $p->{FROM} to $p->{TO} change in $gid";
my $Lcodon = Bio::BioStudio::SeqFeature::Codon->new(
-start => $pos - 1,
-end => $pos+1,
-primary_tag => $swaptype,
-display_name => $codLname,
-wtseq => $oldLcod,
-newseq => $newLcod,
-parent => $parent,
-Note => $modnote,
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
@genes = sort {$b->end - $b->start <=> $a->end - $a->start} @genes;
#Find tags, pick tags, implement tags, check for errors
my @JOBLIST;
my @OUTS;
my @CLEANUP = ($parampath);
my $exec = 'BS_auto_tag_genes.pl';
my $hookup = {};
foreach my $gene (@genes)
{
my $gid = $gene->Tag_load_id;
my $cmd = $exec . q{ -chr } . $chrname . q{ -key } . $key;
$cmd .= q{ -gid } . $gid;
my $outpath = $tmp_path . $key . q{_} . $gid . '.out';
push @JOBLIST, $cmd . q{:} . $outpath . q{:0};
push @OUTS, $outpath;
$hookup->{$gid} = $outpath;
}
my $jobs = join "\n", @JOBLIST;
my $morefiles = taskfarm($jobs, $chrname, $key);
@genes = sort {$a->start <=> $b->start} @genes;
foreach my $gene (@genes)
{
my $gid = $gene->Tag_load_id;
my $outpath = $hookup->{$gid};
if (! -e $outpath)
{
$report{$gid} = 'TAGGING FAILED!';
next;
}
my $result = retrieve($outpath);
my @tags = @{$result->[1]};
my $comment = $result->[0];
if (scalar @tags)
{
add_tags($newchr, $gene, $result->[1]);
}
$report{$gid} = $comment;
}
push @CLEANUP, @OUTS, @{$morefiles};
cleanup(\@CLEANUP);
$factory->cleanup();
return \%report;
}
=head2 serial_tagging
=cut
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
-seq_id => $newchr->seq_id,
-types => 'gene',
-start => $p->{STARTPOS},
-end => $p->{STOPPOS},
-range_type => 'contains'
);
#Find tags, pick tags, implement tags, check for errors
foreach my $gene (@genes)
{
my $gid = $gene->Tag_load_id;
my $result = tag_gene($newchr, $mask, $factory, $gid, $p);
my @tags = @{$result->[1]};
my $comment = $result->[0];
if (scalar @tags)
{
add_tags($newchr, $gene, $result->[1]);
}
$report{$gid} .= $comment;
}
$factory->cleanup();
return \%report;
}
=head2 tag_gene
=cut
sub tag_gene
{
my ($chr, $mask, $factory, $gid, $p) = @_;
my $gene = $chr->fetch_features(-name => $gid);
my $cDNA = $chr->make_cDNA($gene);
my $dir = $gene->strand;
my $return = undef;
if (length $cDNA < $p->{MINORFLEN})
{
return ['too short', []];
}
#Extract all possible tags from the gene
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
my ($gene, $chr, $mask, $p) = @_;
my $MINTAGLEN = $p->{MINTAGLEN},
my $MAXTAGLEN = $p->{MAXTAGLEN},
my $MINTAGMELT = $p->{MINTAGMELT},
my $MAXTAGMELT = $p->{MAXTAGMELT},
my %BADAAS = %{$p->{BADAAS}},
my $MINPERDIFF = $p->{MINPERDIFF},
my $THREEPRIMEBUFFER = $p->{THREEPRIMEBUFFER},
my $FIVEPRIMEBUFFER = $p->{FIVEPRIMEBUFFER},
my $gid = $gene->Tag_load_id;
my $cDNA = $chr->make_cDNA($gene);
my $GD = $chr->GD();
my $gstart = $gene->start();
my ($rstart, $rstop) = ($gstart, $gene->end);
my $dir = $gene->strand;
if ($dir != 1)
{
$rstart += $THREEPRIMEBUFFER;
$rstop -= $FIVEPRIMEBUFFER;
}
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
#Exclude oligos whose recodes don't meet melting standard
my $MDTm = _ntherm($mdoligo);
if ($MDTm < $MINTAGMELT || $MDTm > $MAXTAGMELT)
{
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
next;
}
my $offset = $tstart - $gstart + 1;
my $tagid = $gid . q{_} . $offset;
my $tag = Bio::BioStudio::SeqFeature::Tag->new(
-start => $offset,
-end => $offset + $olen - 1,
-display_name => $tagid,
-ingene => $gid,
-wtseq => $oligo,
-newseq => $mdoligo,
-wtpos => $tstart,
-difference => $comps->{P},
-translation => $peptide,
);
push @pretags, $tag;
($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
}
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
my %objs = ();
# The utag wt and new sequences, forwards
my $uid = $utag->display_name;
my $uwtid = 'rcwt_' . $uid;
$phits{$uwtid} = [];
$objs{$uwtid} = Bio::Seq->new(-seq => $utag->wtseq(), id => $uwtid);
my $unewid = 'rcmd_' . $uid;
$phits{$unewid} = [];
$objs{$unewid} = Bio::Seq->new(-seq => $utag->newseq(), id => $unewid);
# The dtags wt and new sequences, reverses
my @dtagids = ();
my @downbuds = @{$dntags};
my %refs;
foreach my $dtag (@downbuds)
{
my $did = $dtag->display_name;
$refs{$did} = $dtag;
push @dtagids, $did;
my $dwtid = 'rcwt_' . $did;
$phits{$dwtid} = [];
$objs{$dwtid} = Bio::Seq->new(-seq => $dtag->wtseq(), id => $dwtid);
my $dnewid = 'rcmd_' . $did;
$phits{$dnewid} = [];
$objs{$dnewid} = Bio::Seq->new(-seq => $dtag->newseq(), id => $dnewid);
}
my @pairobjs = values %objs;
$factory->run(
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
{
next if $hsp->percent_identity < 85;
next if ($hsp->length < .7 * $qlen);
push @{$phits{$name}}, [$hit, $hsp];
}
}
}
my $partner = undef;
my @Fwts = @{$phits{'rcwt_' . $uid}};
my @Fmds = @{$phits{'rcmd_' . $uid}};
foreach my $dtagid (@dtagids)
{
my ($wtnogo, $mdnogo) = (0, 0);
my @Rwts = @{$phits{'rcwt_' . $dtagid}};
foreach my $Rwpair (@Rwts)
{
my ($Rwhit, $Rwhsp) = @{$Rwpair};
foreach my $Fwpair (@Fwts)
{
my ($Fwhit, $Fwhsp) = @{$Fwpair};
#Pass if hits are on different chromosomes
next if ($Rwhit->name ne $Fwhit->name);
#Pass if hits are on the same strand
next if ($Rwhsp->strand eq $Fwhsp->strand);
$wtnogo++;
}
}
my @Rmds = @{$phits{'rcmd_' . $dtagid}};
foreach my $Rmpair (@Rmds)
{
my ($Rmhit, $Rmhsp) = @{$Rmpair};
foreach my $Fmpair (@Fmds)
{
my ($Fmhit, $Fmhsp) = @{$Fmpair};
#Pass if hits are on different chromosomes
next if ($Rmhit->name ne $Fmhit->name);
#Pass if hits are on the same strand
next if ($Rmhsp->strand eq $Fmhsp->strand);
$mdnogo++;
}
}
if ($mdnogo + $wtnogo == 0)
{
$partner = $refs{$dtagid};
last;
}
}
$factory->cleanup;
return $partner;
}
=head2 counterset
=cut
lib/Bio/BioStudio/PCRTagging.pm view on Meta::CPAN
=cut
sub add_tags
{
my ($chr, $gene, $tagarr, $report) = @_;
my @chosen = @{$tagarr};
my $tagcount = scalar @chosen;
my $gstart = $gene->start;
my $gend = $gene->end;
my $gid = $gene->Tag_load_id;
my $tcount = 1;
foreach my $tagpair (@chosen)
{
my ($gutag, $gdtag) = @{$tagpair};
my $adjust = $gstart - 1;
$gutag->start($gutag->start() + $adjust);
$gdtag->start($gdtag->start() + $adjust);
$gutag->end($gutag->end() + $adjust);
$gdtag->end($gdtag->end() + $adjust);
my $utagid = $gutag->display_name;
my $dtagid = $gdtag->display_name;
my $aid = $gid . '_amp' . $tcount;
my $comment = "PCR_product $aid annotated ";
$comment .= "(tags $utagid and $dtagid added)";
my $amp = Bio::BioStudio::SeqFeature::Amplicon->new(
-start => $gutag->start,
-end => $gdtag->end,
-ingene => $gid,
-uptag => $utagid,
-dntag => $dtagid,
-display_name => $aid,
);
$chr->add_feature(-feature => $gutag);
$chr->add_feature(-feature => $gdtag);
$chr->add_feature(-feature => $amp, -comments => [$comment]);
$tcount++;
}
return;
}