Bio-BioStudio
view release on metacpan or search on metacpan
lib/Bio/BioStudio/RestrictionEnzyme/Seek.pm view on Meta::CPAN
=head1 Functions
=head2 farm_search
=cut
sub farm_search
{
my ($chr, $featlist, $p) = @_;
my $chrname = $chr->name();
my $redbname = $chrname . '_RED';
my $GD = $chr->GD();
my $RES = $GD->set_restriction_enzymes(-enzyme_set => $p->{ENZYME_SET});
my @regs = grep {$_->class ne 'IIB'} values %{$RES};
my $key = Digest::MD5::md5_hex(Digest::MD5::md5_hex(time().{}.rand().$$));
my $tmp_path = Bio::BioStudio::ConfigData->config('tmp_path');
my $tree = $GD->build_prefix_tree(-input => \@regs, -peptide => 1);
my $treefilename = $tmp_path . $key . '.ptree';
store $tree, $treefilename;
my $parampath = $tmp_path . $key . '.param';
store $p, $parampath;
my @JOBLIST;
my @OUTS;
my @CLEANUP = ($treefilename);
my $exec = 'BS_auto_gather_enzymes.pl';
# Take each feature and create a taskfarmer work order for it.
#
my @feats = sort {$b->end - $b->start <=> $a->end - $a->start} @{$featlist};
foreach my $feat (@feats)
{
my $name = $feat->display_name;
my $outpath = $tmp_path . $key . q{_} . $name . '.out';
my $cmd = $exec . q{ -chr } . $chrname . q{ -key } . $key;
$cmd .= q{ -feature } . $name;
push @JOBLIST, $cmd . q{:} . $outpath . q{:0};
push @OUTS, $outpath;
}
my $jobs = join "\n", @JOBLIST;
my $morefiles = taskfarm($jobs, $chrname, $key, 32);
my @reals = ();
foreach my $out (@OUTS)
{
if (-e $out)
{
push @reals, $out;
}
else
{
print "Failed on $out\n";
}
}
my $loadpath = $tmp_path . $key . q{_} . $redbname . '.load';
my $catcmd = "cat @reals > $loadpath";
safeopen($catcmd);
chmod 0777, $loadpath;
my $REDB = Bio::BioStudio::RestrictionEnzyme::Store->new
(
-name => $redbname,
-enzyme_definitions => $RES,
-create => 1,
-file => $loadpath
);
$REDB->dumpfile($loadpath);
$REDB->load();
push @CLEANUP, @OUTS, @{$morefiles};
cleanup(\@CLEANUP);
return $REDB;
}
=head2 serial_search
=cut
sub serial_search
{
my ($chr, $featlist, $p) = @_;
my $chrname = $chr->name();
my $chrlen = $chr->len();
my $redbname = $chrname . '_RED';
my $RES = $chr->GD->set_restriction_enzymes(-enzyme_set => $p->{ENZYME_SET});
my @regs = grep {$_->class ne 'IIB'} values %{$RES};
my @bees = grep {$_->class eq 'IIB'} values %{$RES};
## Create and populate the prefix tree of restriction enzyme recognition sites
my $tree = $chr->GD->build_prefix_tree(-input => \@regs, -peptide => 1);
my $tmp_path = Bio::BioStudio::ConfigData->config('tmp_path');
my $filename = $tmp_path . $redbname . '.out';
#my @feats = sort {$b->end - $b->start <=> $a->end - $a->start} @{$featlist};
my @feats = sort {$a->end - $a->start <=> $b->end - $b->start} @{$featlist};
chmod 0777, $filename;
open my $OUT, '>', $filename;
foreach my $feat (@feats)
{
my $id = $feat->display_name;
#print "Checking $id\n";
my $results;
my $besults;
if ($feat->primary_tag eq 'CDS')
{
$besults = find_IIBs_in_CDS($chr, \@bees, $feat);
$results = find_enzymes_in_CDS($chr, $tree, $feat);
}
else
{
$results = find_enzymes_in_igen($chr, $feat->start, $feat->end);
}
foreach my $enz (@{$results}, @{$besults})
{
print $OUT $enz->line_report(q{.}, "\n");
}
#print "\tGot ", scalar @{$results}, " plus ", scalar @{$besults}, " from $id\n";
}
close $OUT;
my $REDB = Bio::BioStudio::RestrictionEnzyme::Store->new
(
-name => $redbname,
-enzyme_definitions => $RES,
-create => 1,
-file => $filename
);
$REDB->dumpfile($filename);
$REDB->load();
return $REDB;
}
=head2 find_enzymes_in_CDS
=cut
sub find_enzymes_in_CDS
{
my ($chr, $tree, $feat) = @_;
my $GD = $chr->GD();
my $phase = $feat->phase || 0;
my $nucseq = $feat->seq->seq;
my $aaseq = $GD->translate(-sequence => $nucseq, -frame => 1 + $phase);
my $hits = $GD->search_prefix_tree(-tree => $tree, -sequence => $aaseq);
my $results = [];
my $RES = $GD->enzyme_set;
my $fstart = $feat->start;
my $fend = $feat->end;
my $forient = $feat->strand;
foreach my $rog (@{$hits})
{
my $enz = $rog->[0];
my $enzyme = $RES->{$enz};
my $class = $enzyme->class();
my $type = $enzyme->type();
my $nucstart = $rog->[1] * 3;
my $realpos = $fstart + $nucstart;
( run in 2.150 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )