Bio-BioStudio

 view release on metacpan or  search on metacpan

lib/Bio/BioStudio/RestrictionEnzyme/Seek.pm  view on Meta::CPAN


=head1 Functions

=head2 farm_search

=cut

sub farm_search
{
  my ($chr, $featlist, $p) = @_;
  my $chrname = $chr->name();
  my $redbname = $chrname . '_RED';
  my $GD = $chr->GD();
  my $RES = $GD->set_restriction_enzymes(-enzyme_set => $p->{ENZYME_SET});
  my @regs = grep {$_->class ne 'IIB'} values %{$RES};
  
  my $key = Digest::MD5::md5_hex(Digest::MD5::md5_hex(time().{}.rand().$$));
  my $tmp_path = Bio::BioStudio::ConfigData->config('tmp_path');
  
  my $tree = $GD->build_prefix_tree(-input => \@regs, -peptide => 1);
  my $treefilename = $tmp_path . $key . '.ptree';
  store $tree, $treefilename;

  my $parampath = $tmp_path . $key . '.param';
  store $p, $parampath;
    
  my @JOBLIST;
  my @OUTS;
  my @CLEANUP = ($treefilename);
  my $exec = 'BS_auto_gather_enzymes.pl';
  # Take each feature and create a taskfarmer work order for it.
  #
  my @feats = sort {$b->end - $b->start <=> $a->end - $a->start} @{$featlist};
  foreach my $feat (@feats)
  {
    my $name = $feat->display_name;
    my $outpath = $tmp_path . $key . q{_} . $name . '.out';
    my $cmd = $exec . q{ -chr } . $chrname . q{ -key } . $key;
    $cmd .= q{ -feature } . $name;
    push @JOBLIST, $cmd . q{:} . $outpath . q{:0};
    push @OUTS, $outpath;
  }
  my $jobs = join "\n", @JOBLIST;
  my $morefiles = taskfarm($jobs, $chrname, $key, 32);
  
  my @reals = ();
  foreach my $out (@OUTS)
  {
    if (-e $out)
    {
      push @reals, $out;
    }
    else
    {
      print "Failed on $out\n";
    }
  }
  my $loadpath = $tmp_path . $key . q{_} . $redbname . '.load';
  my $catcmd = "cat @reals > $loadpath";
  safeopen($catcmd);
  chmod 0777, $loadpath;
  my $REDB = Bio::BioStudio::RestrictionEnzyme::Store->new
  (
    -name => $redbname,
    -enzyme_definitions => $RES,
    -create => 1,
    -file => $loadpath
  );
  $REDB->dumpfile($loadpath);
  $REDB->load();
  push @CLEANUP, @OUTS, @{$morefiles};
  cleanup(\@CLEANUP);
  return $REDB;
}

=head2 serial_search

=cut

sub serial_search
{
  my ($chr, $featlist, $p) = @_;
  my $chrname = $chr->name();
  my $chrlen = $chr->len();
  my $redbname = $chrname . '_RED';
  my $RES = $chr->GD->set_restriction_enzymes(-enzyme_set => $p->{ENZYME_SET});
  my @regs = grep {$_->class ne 'IIB'} values %{$RES};
  my @bees = grep {$_->class eq 'IIB'} values %{$RES};

  ## Create and populate the prefix tree of restriction enzyme recognition sites
  my $tree = $chr->GD->build_prefix_tree(-input => \@regs, -peptide => 1);

  my $tmp_path = Bio::BioStudio::ConfigData->config('tmp_path');
  my $filename = $tmp_path . $redbname . '.out';
  #my @feats = sort {$b->end - $b->start <=> $a->end - $a->start} @{$featlist};
  my @feats = sort {$a->end - $a->start <=> $b->end - $b->start} @{$featlist};
  chmod 0777, $filename;
  open my $OUT, '>', $filename;
  foreach my $feat (@feats)
  {
    my $id = $feat->display_name;
    #print "Checking $id\n";
    my $results;
    my $besults;
    if ($feat->primary_tag eq 'CDS')
    {
      $besults = find_IIBs_in_CDS($chr, \@bees, $feat);
      $results = find_enzymes_in_CDS($chr, $tree, $feat);
    }
    else
    {
      $results = find_enzymes_in_igen($chr, $feat->start, $feat->end);
    }
    foreach my $enz (@{$results}, @{$besults})
    {
      print $OUT $enz->line_report(q{.}, "\n");
    }
    #print "\tGot ", scalar @{$results}, " plus ", scalar @{$besults}, " from $id\n";
  }
  close $OUT;
  my $REDB = Bio::BioStudio::RestrictionEnzyme::Store->new
  (
    -name => $redbname,
    -enzyme_definitions => $RES,
    -create => 1,
    -file => $filename
  );
  $REDB->dumpfile($filename);
  $REDB->load();
  return $REDB;
}

=head2 find_enzymes_in_CDS

=cut

sub find_enzymes_in_CDS
{
  my ($chr, $tree, $feat) = @_;
  my $GD = $chr->GD();
  my $phase = $feat->phase || 0;
  my $nucseq = $feat->seq->seq;
  my $aaseq = $GD->translate(-sequence => $nucseq, -frame => 1 + $phase);
  my $hits = $GD->search_prefix_tree(-tree => $tree, -sequence => $aaseq);
  my $results = [];
  my $RES = $GD->enzyme_set;
  my $fstart = $feat->start;
  my $fend = $feat->end;
  my $forient = $feat->strand;
  foreach my $rog (@{$hits})
  {
    my $enz       = $rog->[0];
    my $enzyme    = $RES->{$enz};
    my $class     = $enzyme->class();
    my $type      = $enzyme->type();
    my $nucstart  = $rog->[1] * 3;
    my $realpos   = $fstart + $nucstart;



( run in 2.150 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )