Bio-BioStudio

 view release on metacpan or  search on metacpan

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

    
    #Exclude oligos that begin or end in unswappable codons
    my $peptide = _translate($woligo, $dir, $GD->{codontable});
    my $firstres = substr $peptide,  0, 1;
    my $lastres  = substr $peptide, -1, 1;
    if (exists $BADAAS{$firstres} || exists $BADAAS{$lastres})
    {
      ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
      next;
    }
    
    #Exclude oligos that don't meet melting standard
    my $currTm = _ntherm($oligo);
    if ($currTm  < $MINTAGMELT || $currTm > $MAXTAGMELT)
    {
      if ($currTm < $MINTAGMELT)
      {
        $rstart = $rstart + 3;
        $count = $MAXTAGLEN;
      }
      elsif ($currTm > $MAXTAGMELT)
      {
        ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
      }
      next;
    }
        
    #Exclude oligos that aren't entirely in exons or lapped by other genes
    my @ofeats = $mask->feature_objects_in_range($rstart, $count);
    my @ifeats = grep { $_->primary_tag eq 'intron' } @ofeats;
    if (scalar @ifeats || scalar @ofeats > 1)
    {
      ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
      next;
    }

    #Recode oligos
    $woligo = _complement($woligo, 1) if ($dir == -1);
    my $wmdoligo = $GD->codon_juggle(
      -sequence => $woligo,
      -algorithm => 'most_different_sequence'
    );
    
    #Ensure that the first two bases are the same
    if ((substr $wmdoligo, 0, 2) ne (substr $woligo, 0, 2))
    {
      my $codon = substr $woligo, 0, 3;
      my $aa = $GD->{codontable}->{$codon};
      my $di = substr $codon, 0, 2;
      my $possibles = $GD->{reversecodontable}->{$aa};
      my @choices = grep {(substr $_, 0, 2) eq $di} @{$possibles};
      @choices = grep {(substr $_, -1) ne (substr $codon, -1)} @choices;
      substr $wmdoligo, 0, 3, $choices[0];
    }
    $woligo = _complement($woligo, 1) if ($dir == -1);
    $wmdoligo = _complement($wmdoligo, 1) if ($dir == -1);
    my $mdoligo = $dir == 1
      ? substr($wmdoligo, 2, $wolen - 2)
      : substr($wmdoligo, 0, $wolen - 2);

    #Exclude oligos whose recodes don't meet percent difference standard
    my $comps = _compare_sequences($oligo, $mdoligo);
    if ( $comps->{P} < $MINPERDIFF)
    {
      ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
      next;
    }
    
    #Exclude oligos whose recodes don't meet melting standard
    my $MDTm = _ntherm($mdoligo);
    if ($MDTm  < $MINTAGMELT || $MDTm > $MAXTAGMELT)
    {
      ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
      next;
    }
    
    my $offset = $tstart - $gstart + 1;
    my $tagid = $gid . q{_} . $offset;
    my $tag = Bio::BioStudio::SeqFeature::Tag->new(
      -start          => $offset,
      -end            => $offset + $olen - 1,
      -display_name   => $tagid,
      -ingene         => $gid,
      -wtseq          => $oligo,
      -newseq         => $mdoligo,
      -wtpos          => $tstart,
      -difference     => $comps->{P},
      -translation    => $peptide,
    );
    push @pretags, $tag;
    
    ($count, $rstart) = counterset($count, $rstart, $MINTAGLEN, $MAXTAGLEN);
  }
  return \@pretags;
}

=head2 BLAST_tags

=cut

sub BLAST_tags
{
  my ($tagarr, $factory) = @_;
  my @tags = @{$tagarr};
  
  #BLAST all tags against the latest version of the genome
  my @tagobjs = ();
  foreach my $tag (@tags)
  {
    my $id = $tag->display_name;
    my $wtobj  = Bio::Seq->new(-seq => $tag->wtseq(),  id => 'wt_' . $id);
    my $newobj = Bio::Seq->new(-seq => $tag->newseq(), id => 'md_' . $id);
    push @tagobjs, $wtobj, $newobj;
  }
  
  $factory->run(
    -method           => 'blastn',
    -query            => \@tagobjs,
    -method_args => [
      -word_size      => 17,
      -perc_identity  => 70

lib/Bio/BioStudio/PCRTagging.pm  view on Meta::CPAN

  return \@chosen;
}

=head2 BLAST_pairs

=cut

sub BLAST_pairs
{
  my ($utag, $dntags, $factory) = @_;
  #BLAST the pair to check amplification uniqueness.
  #
  my %phits = ();
  my %objs = ();
  # The utag wt and new sequences, forwards
  my $uid = $utag->display_name;
  my $uwtid = 'rcwt_' . $uid;
  $phits{$uwtid} = [];
  $objs{$uwtid} = Bio::Seq->new(-seq => $utag->wtseq(),  id => $uwtid);
  my $unewid = 'rcmd_' . $uid;
  $phits{$unewid} = [];
  $objs{$unewid} = Bio::Seq->new(-seq => $utag->newseq(), id => $unewid);
  # The dtags wt and new sequences, reverses
  my @dtagids = ();
  my @downbuds = @{$dntags};
  my %refs;
  foreach my $dtag (@downbuds)
  {
    my $did = $dtag->display_name;
    $refs{$did} = $dtag;
    push @dtagids, $did;
    my $dwtid = 'rcwt_' . $did;
    $phits{$dwtid} = [];
    $objs{$dwtid} = Bio::Seq->new(-seq => $dtag->wtseq(),  id => $dwtid);
    my $dnewid = 'rcmd_' . $did;
    $phits{$dnewid} = [];
    $objs{$dnewid} = Bio::Seq->new(-seq => $dtag->newseq(), id => $dnewid);
  }
  
  my @pairobjs = values %objs;
  $factory->run(
    -method               => 'blastn',
    -query                => \@pairobjs,
    -method_args => [
      -word_size          => 4,
      -gapextend          => 2,
      -gapopen            => 1,
      -penalty            => -1,
      -perc_identity      => 70,
      -best_hit_overhang  => .25
  ]);
  $factory->rewind_results;
  while (my $result = $factory->next_result)
  {
    my $name = $result->query_name();
    my $qlen = length $objs{$name}->seq;
    while( my $hit = $result->next_hit())
    {
      while( my $hsp = $hit->next_hsp())
      {
        next if $hsp->percent_identity < 85;
        next if ($hsp->length < .7 * $qlen);
        push @{$phits{$name}}, [$hit, $hsp];
      }
    }
  }
  my $partner = undef;
  my @Fwts = @{$phits{'rcwt_' . $uid}};
  my @Fmds = @{$phits{'rcmd_' . $uid}};
  foreach my $dtagid (@dtagids)
  {
    my ($wtnogo, $mdnogo) = (0, 0);
    my @Rwts = @{$phits{'rcwt_' . $dtagid}};
    foreach my $Rwpair (@Rwts)
    {
      my ($Rwhit, $Rwhsp) = @{$Rwpair};
      foreach my $Fwpair (@Fwts)
      {
        my ($Fwhit, $Fwhsp) = @{$Fwpair};
        #Pass if hits are on different chromosomes
        next if ($Rwhit->name ne $Fwhit->name);
        #Pass if hits are on the same strand
        next if ($Rwhsp->strand eq $Fwhsp->strand);
        $wtnogo++;
      }
    }
    my @Rmds = @{$phits{'rcmd_' . $dtagid}};
    foreach my $Rmpair (@Rmds)
    {
      my ($Rmhit, $Rmhsp) = @{$Rmpair};
      foreach my $Fmpair (@Fmds)
      {
        my ($Fmhit, $Fmhsp) = @{$Fmpair};
        #Pass if hits are on different chromosomes
        next if ($Rmhit->name ne $Fmhit->name);
        #Pass if hits are on the same strand
        next if ($Rmhsp->strand eq $Fmhsp->strand);
        $mdnogo++;
      }
    }
    if ($mdnogo + $wtnogo == 0)
    {
      $partner = $refs{$dtagid};
      last;
    }
  }
  $factory->cleanup;
  return $partner;
}

=head2 counterset

=cut

sub counterset
{
  my ($count, $start, $minlen, $maxlen) = @_;
  $start = $start + 3 if ($count < $minlen + 2);
  $count = $count >= $minlen + 2 ? $count - 3 : $maxlen;
  return ($count, $start);
}



( run in 0.523 second using v1.01-cache-2.11-cpan-39bf76dae61 )