Bio-BioStudio

 view release on metacpan or  search on metacpan

bin/BS_CodonJuggler.pl  view on Meta::CPAN

  if ($newpep ne $oldpep)
  {
    $state->{$gname}->[2] .= ' Change in amino acid sequence;';
  }
}

#Do reporting
print "\nReport:\n";
foreach my $gname (sort keys %{$state})
{
  my $gid = $state->{$gname}->[3];
  my @results = @{$state->{$gname}};
  next unless($results[0] || $results[1] || $results[2]);
  print $gname, q{ : };
  if ($results[0])
  {
    my $plural = $results[0] > 1  ? 's' : q{};
    print "$results[0] $p{FROM} codon$plural changed; ";
  }
  if ($results[1])
  {

bin/BS_PCRTagger.pl  view on Meta::CPAN

  -types      => 'gene',
  -start      => $p{STARTPOS},
  -end        => $p{STOPPOS},
  -range_type => 'contains'
);
foreach my $gene (@genes)
{
  my $gstart = $gene->start;
  my $gend = $gene->end;
  my $glen = $gend - $gstart + 1;
  my $gid = $gene->Tag_load_id;
  my $newgeneseq = substr $newseq, $gstart - 1, $glen;
  my $oldgeneseq = substr $chrseq, $gstart - 1, $glen;
  if ($newgeneseq eq $oldgeneseq)
  {
    $report{$gid} .= ' No change in sequence;';
  }
  my $cdna = $chr->make_cDNA($gene);
  my $oldpep = $GD->translate(-sequence => $cdna);
  my $newcdna = $newchr->make_cDNA($gene);
  my $newpep = $GD->translate(-sequence => $newcdna);
  if ($newpep ne $oldpep)
  {
    $report{$gid} .= ' Change in amino acid sequence;';
  }
}
print "\n\n";

foreach my $gid (sort keys %report)
{
  print "$gid : $report{$gid}\n";
}

#Tell chromosome to write itself
$newchr->add_reason($p{EDITOR}, $p{MEMO});
$newchr->write_chromosome();

exit;

__END__

lib/Bio/BioStudio/CodonJuggling.pm  view on Meta::CPAN

  my @JOBLIST;
  my @OUTS;
  my @CLEANUP = ($parampath);
  my $exec = 'BS_auto_codon_juggle.pl';
  my $hookups = {};
  my $memo = {};
  foreach my $gene (@pgenes)
  {
    my $gname  = $gene->display_name;
    my $cmd = $exec . q{ -chr } . $chrname . q{ -key } . $key;
    $cmd .= q{ -gid } . $gname;
    my $outpath = $tmp_path . $key . q{_} . $gname . '.out';
    push @JOBLIST, $cmd . q{:} . $outpath . q{:0};
    push @OUTS, $outpath;
    $hookups->{$outpath} = $gname;
  }
  my $jobs = join "\n", @JOBLIST;
  my $morefiles = taskfarm($jobs, $chrname, $key);
  foreach my $out (@OUTS)
  {
    if (! -e $out)
    {
      my $gid = $hookups->{$out};
      $state->{$gid}->[2] .= ' Codon juggling failed! ';
      next;
    }
    my $result = retrieve($out);
    foreach my $gname (keys %{$result})
    {
      if (exists $state->{$gname})
      {
        $state->{$gname}->[0] += $result->{$gname}->[0];
        $state->{$gname}->[1] += $result->{$gname}->[1];
        $state->{$gname}->[2] .= $result->{$gname}->[2];

lib/Bio/BioStudio/CodonJuggling.pm  view on Meta::CPAN

  }
  return $state;
}

=head2 juggle_gene

=cut

sub juggle_gene
{
  my ($chr, $mask, $gid, $p) = @_;
  my $gene = $chr->fetch_features(-name => $gid);
  my $GD = $chr->GD();
  my $chrseq = $chr->sequence();
  my $return = {};

  my $offset = $gene->start() - 1;
  my $gmask = Bio::BioStudio::Mask->new(-sequence => $gene, -offset => $offset);
  my @subfeats = $chr->flatten_subfeats($gene);
  $gmask->add_to_mask(\@subfeats);
  my %subs = map {$_->primary_id => 1} @subfeats;
  $subs{$gid}++;
  
  my $orient = $gene->strand();
  my $gstart = $gene->start();
  my $gend = $gene->end();
  my $glen = $gend - $gstart + 1;
  #number of codons, number of alts, notes, $gid, codons
  $return->{$gid} = [0, 0, q{}, []];
  
  my $x = $gstart;
  while ($x <= $gend - 2)
  {
    ##Adjust for introns
    my %posa = $gmask->what_objects_overlap($x);
    my @introns = grep {$_->primary_tag eq 'intron'} values %posa;
    $x = $introns[0]->end + 1 if (scalar @introns);
    my $basea = $x;
    $x++;

lib/Bio/BioStudio/CodonJuggling.pm  view on Meta::CPAN

    my $cod = (substr $chrseq, $basea - 1, 1)
            . (substr $chrseq, $baseb - 1, 1)
            . (substr $chrseq, $basec - 1, 1);
    next if ($orient <  0 && $cod ne $p->{MORF});
    next if ($orient >= 0 && $cod ne $p->{FROM});
    
    ##Annotate a potential codon change
    my $newcod = $orient > 0  ? $p->{TO}  : $p->{OT};
    my $oldcod = $orient > 0 ? $p->{FROM} : $p->{MORF};
    my $oset = $basea - $gstart + 1;
    my $codname = $gid . q{_} . $p->{OLDAA} . $oset . $p->{NEWAA};

    my $codon = Bio::BioStudio::SeqFeature::Codon->new(
      -start        => $basea,
      -end          => $basec,
      -primary_tag  => $p->{SWAPTYPE},
      -display_name => $codname,
      -wtseq        => $oldcod,
      -newseq       => $newcod,
      -parent       => $gid
    );

    ## Check for overlaps
    my $maskbit = $mask->count_features_in_range($basea, 3);
    
    ## No overlap involved - make the change and move on
    #
    if ($maskbit == 1)
    {
      $return->{$gid}->[0]++;
      push @{$return->{$gid}->[3]}, $codon;
      next;
    }
    
    ## Overlap involved:
    #
    my @featlaps = $mask->feature_objects_in_range($basea, 3);
    my @theseolaps = grep {! exists $subs{$_->primary_id}} @featlaps;
    my $lapcount = scalar @theseolaps;
    # If there are more than one overlapping features, no change can be made
    if ($lapcount > 1)
    {
      $return->{$gid}->[2] .= "cannot change codon at $basea (too occluded) ";
      next;
    }
    # If there are zero, we have run into an internal error
    elsif ($lapcount < 1)
    {
      $return->{$gid}->[2] .= "cannot change codon at $basea (INTERNAL ERROR) ";
      next;
    }
    
    # If the only overlapping feature is an intron, no change can be made
    if ($theseolaps[0]->primary_tag eq 'intron')
    {
      $return->{$gid}->[2] .= "cannot change codon at $basea (overlaps an intron)";
      next;
    }
    
    # Now the overlap must be a CDS; determine the frame
    my $lapfeat = $theseolaps[0];
    my $lapname = $lapfeat->display_name;
    my $lapstart = $lapfeat->start;
    my $laporient = $lapfeat->strand;
    my $laplen = $lapfeat->end - $lapstart + 1;
    my $CDSseq = substr $chrseq, $lapstart - 1, $laplen;

lib/Bio/BioStudio/CodonJuggling.pm  view on Meta::CPAN

    # If the change doesn't preserve overlapping sequence and no exceptions were
    # provided, skip
    my $lapstatus = $lapfeat->Tag_orf_classification;
    my $genstatus = $gene->Tag_orf_classification;
    my $dubwhack = $lapstatus eq 'Dubious' && $genstatus ne 'Dubious' ? 1 : 0;
    my $verwhack = $genstatus ne 'Dubious'  ? 1 : 0;
    
    if ($allowflag == 0 && (! $p->{ALLWHACK}
      || ! ($dubwhack && $p->{DUBWHACK}) || ! ($verwhack && $p->{VERWHACK})))
    {
      $return->{$gid}->[2] .= "cannot change codon at $basea (overlaps $lapname)";
      next;
    }

    # Add the codon for this gene
    $return->{$gid}->[0]++;
    push @{$return->{$gid}->[3]}, $codon;

    # Add the codon for the overlapping gene
    my $newCDSseq = substr $chrseq, $lapstart - 1, $laplen;
    my $newframe = substr $newCDSseq, $syncodstart, $length;
    
    for (my $offset = 0; $offset <= $length - 3; $offset += 3)
    {
      my $oldsyncod = substr $inframe, $offset, 3;
      my $newsyncod = substr $newframe, $offset, 3;
      my $pos = $syncodstart + $offset + $lapstart + 1;

lib/Bio/BioStudio/CodonJuggling.pm  view on Meta::CPAN

        my $oldLaa = $GD->codontable->{$oldLcod};

        my $swaptype = $GD->codon_change_type(-from => $oldLcod, -to => $newLcod);
        $newLcod = $newsyncod if ($laporient == -1);
        $oldLcod = $oldsyncod if ($laporient == -1);
        
        my $Loffset = $pos - $lapstart + 1;
        my $parent = $lapfeat->has_tag('parent_id') ? $lapfeat->Tag_parent_id : $lapname;
        $return->{$parent} = [0, 0, q{}, []] if (! exists $return->{$parent});
        my $codLname = $parent . q{_} . $oldLaa . $Loffset . $newLaa;
        my $modnote = " $codLname modified to accommodate $p->{FROM} to $p->{TO} change in $gid";

        my $Lcodon = Bio::BioStudio::SeqFeature::Codon->new(
          -start        => $pos - 1,
          -end          => $pos+1,
          -primary_tag  => $swaptype,
          -display_name => $codLname,
          -wtseq        => $oldLcod,
          -newseq       => $newLcod,
          -parent       => $parent,
          -Note         => $modnote,

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

  @genes = sort {$b->end - $b->start <=> $a->end - $a->start} @genes;

  #Find tags, pick tags, implement tags, check for errors
  my @JOBLIST;
  my @OUTS;
  my @CLEANUP = ($parampath);
  my $exec = 'BS_auto_tag_genes.pl';
  my $hookup = {};
  foreach my $gene (@genes)
  {
    my $gid  = $gene->Tag_load_id;
    my $cmd = $exec . q{ -chr } . $chrname . q{ -key } . $key;
    $cmd .= q{ -gid } . $gid;
    my $outpath = $tmp_path . $key . q{_} . $gid . '.out';
    push @JOBLIST, $cmd . q{:} . $outpath . q{:0};
    push @OUTS, $outpath;
    $hookup->{$gid} = $outpath;
  }
  my $jobs = join "\n", @JOBLIST;
  my $morefiles = taskfarm($jobs, $chrname, $key);

  @genes = sort {$a->start <=> $b->start} @genes;
  foreach my $gene (@genes)
  {
    my $gid = $gene->Tag_load_id;
    my $outpath = $hookup->{$gid};
    if (! -e $outpath)
    {
      $report{$gid} = 'TAGGING FAILED!';
      next;
    }
    my $result = retrieve($outpath);
    my @tags = @{$result->[1]};
    my $comment = $result->[0];
    if (scalar @tags)
    {
      add_tags($newchr, $gene, $result->[1]);
    }
    $report{$gid} = $comment;
  }
  push @CLEANUP, @OUTS, @{$morefiles};
  cleanup(\@CLEANUP);
  $factory->cleanup();
  return \%report;
}

=head2 serial_tagging

=cut

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

    -seq_id     => $newchr->seq_id,
    -types      => 'gene',
    -start      => $p->{STARTPOS},
    -end        => $p->{STOPPOS},
    -range_type => 'contains'
  );

  #Find tags, pick tags, implement tags, check for errors
  foreach my $gene (@genes)
  {
    my $gid  = $gene->Tag_load_id;
    my $result = tag_gene($newchr, $mask, $factory, $gid, $p);
    my @tags = @{$result->[1]};
    my $comment = $result->[0];
    if (scalar @tags)
    {
      add_tags($newchr, $gene, $result->[1]);
    }
    $report{$gid} .= $comment;
  }
  $factory->cleanup();
  return \%report;
}

=head2 tag_gene

=cut

sub tag_gene
{
  my ($chr, $mask, $factory, $gid, $p) = @_;
  my $gene = $chr->fetch_features(-name => $gid);
  my $cDNA = $chr->make_cDNA($gene);
  my $dir = $gene->strand;
  my $return = undef;

  if (length $cDNA < $p->{MINORFLEN})
  {
    return ['too short', []];
  }
  
  #Extract all possible tags from the gene

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

  my ($gene, $chr, $mask, $p) = @_;
  my $MINTAGLEN        = $p->{MINTAGLEN},
  my $MAXTAGLEN        = $p->{MAXTAGLEN},
  my $MINTAGMELT       = $p->{MINTAGMELT},
  my $MAXTAGMELT       = $p->{MAXTAGMELT},
  my %BADAAS           = %{$p->{BADAAS}},
  my $MINPERDIFF       = $p->{MINPERDIFF},
  my $THREEPRIMEBUFFER = $p->{THREEPRIMEBUFFER},
  my $FIVEPRIMEBUFFER  = $p->{FIVEPRIMEBUFFER},

  my $gid = $gene->Tag_load_id;
  my $cDNA = $chr->make_cDNA($gene);
  my $GD = $chr->GD();
  my $gstart = $gene->start();
  my ($rstart, $rstop) = ($gstart, $gene->end);
  my $dir = $gene->strand;
  if ($dir != 1)
  {
    $rstart += $THREEPRIMEBUFFER;
    $rstop  -= $FIVEPRIMEBUFFER;
  }

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

    
    #Exclude oligos whose recodes don't meet melting standard
    my $MDTm = _ntherm($mdoligo);
    if ($MDTm  < $MINTAGMELT || $MDTm > $MAXTAGMELT)
    {
      ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
      next;
    }
    
    my $offset = $tstart - $gstart + 1;
    my $tagid = $gid . q{_} . $offset;
    my $tag = Bio::BioStudio::SeqFeature::Tag->new(
      -start          => $offset,
      -end            => $offset + $olen - 1,
      -display_name   => $tagid,
      -ingene         => $gid,
      -wtseq          => $oligo,
      -newseq         => $mdoligo,
      -wtpos          => $tstart,
      -difference     => $comps->{P},
      -translation    => $peptide,
    );
    push @pretags, $tag;
    
    ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
  }

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

  my %objs = ();
  # The utag wt and new sequences, forwards
  my $uid = $utag->display_name;
  my $uwtid = 'rcwt_' . $uid;
  $phits{$uwtid} = [];
  $objs{$uwtid} = Bio::Seq->new(-seq => $utag->wtseq(),  id => $uwtid);
  my $unewid = 'rcmd_' . $uid;
  $phits{$unewid} = [];
  $objs{$unewid} = Bio::Seq->new(-seq => $utag->newseq(), id => $unewid);
  # The dtags wt and new sequences, reverses
  my @dtagids = ();
  my @downbuds = @{$dntags};
  my %refs;
  foreach my $dtag (@downbuds)
  {
    my $did = $dtag->display_name;
    $refs{$did} = $dtag;
    push @dtagids, $did;
    my $dwtid = 'rcwt_' . $did;
    $phits{$dwtid} = [];
    $objs{$dwtid} = Bio::Seq->new(-seq => $dtag->wtseq(),  id => $dwtid);
    my $dnewid = 'rcmd_' . $did;
    $phits{$dnewid} = [];
    $objs{$dnewid} = Bio::Seq->new(-seq => $dtag->newseq(), id => $dnewid);
  }
  
  my @pairobjs = values %objs;
  $factory->run(

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

      {
        next if $hsp->percent_identity < 85;
        next if ($hsp->length < .7 * $qlen);
        push @{$phits{$name}}, [$hit, $hsp];
      }
    }
  }
  my $partner = undef;
  my @Fwts = @{$phits{'rcwt_' . $uid}};
  my @Fmds = @{$phits{'rcmd_' . $uid}};
  foreach my $dtagid (@dtagids)
  {
    my ($wtnogo, $mdnogo) = (0, 0);
    my @Rwts = @{$phits{'rcwt_' . $dtagid}};
    foreach my $Rwpair (@Rwts)
    {
      my ($Rwhit, $Rwhsp) = @{$Rwpair};
      foreach my $Fwpair (@Fwts)
      {
        my ($Fwhit, $Fwhsp) = @{$Fwpair};
        #Pass if hits are on different chromosomes
        next if ($Rwhit->name ne $Fwhit->name);
        #Pass if hits are on the same strand
        next if ($Rwhsp->strand eq $Fwhsp->strand);
        $wtnogo++;
      }
    }
    my @Rmds = @{$phits{'rcmd_' . $dtagid}};
    foreach my $Rmpair (@Rmds)
    {
      my ($Rmhit, $Rmhsp) = @{$Rmpair};
      foreach my $Fmpair (@Fmds)
      {
        my ($Fmhit, $Fmhsp) = @{$Fmpair};
        #Pass if hits are on different chromosomes
        next if ($Rmhit->name ne $Fmhit->name);
        #Pass if hits are on the same strand
        next if ($Rmhsp->strand eq $Fmhsp->strand);
        $mdnogo++;
      }
    }
    if ($mdnogo + $wtnogo == 0)
    {
      $partner = $refs{$dtagid};
      last;
    }
  }
  $factory->cleanup;
  return $partner;
}

=head2 counterset

=cut

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN


=cut

sub add_tags
{
  my ($chr, $gene, $tagarr, $report) = @_;
  my @chosen = @{$tagarr};
  my $tagcount = scalar @chosen;
  my $gstart = $gene->start;
  my $gend = $gene->end;
  my $gid = $gene->Tag_load_id;
  my $tcount = 1;
  foreach my $tagpair (@chosen)
  {
    my ($gutag, $gdtag) = @{$tagpair};
    my $adjust = $gstart - 1;
    $gutag->start($gutag->start() + $adjust);
    $gdtag->start($gdtag->start() + $adjust);
    $gutag->end($gutag->end() + $adjust);
    $gdtag->end($gdtag->end() + $adjust);
    my $utagid = $gutag->display_name;
    my $dtagid = $gdtag->display_name;
    my $aid = $gid . '_amp' . $tcount;
    my $comment  = "PCR_product $aid annotated ";
    $comment .= "(tags $utagid and $dtagid added)";
  
    my $amp = Bio::BioStudio::SeqFeature::Amplicon->new(
      -start          => $gutag->start,
      -end            => $gdtag->end,
      -ingene         => $gid,
      -uptag          => $utagid,
      -dntag          => $dtagid,
      -display_name   => $aid,
    );
    $chr->add_feature(-feature => $gutag);
    $chr->add_feature(-feature => $gdtag);
    $chr->add_feature(-feature => $amp, -comments => [$comment]);
    $tcount++;
  }
  return;
}



( run in 1.776 second using v1.01-cache-2.11-cpan-5735350b133 )