Bio-MUST-Drivers
view release on metacpan or search on metacpan
lib/Bio/MUST/Drivers/Exonerate.pm view on Meta::CPAN
use aliased 'Bio::MUST::Drivers::Exonerate::Sugar';
has 'dna_seq' => (
is => 'ro',
isa => 'Bio::MUST::Core::Seq',
required => 1,
);
has 'pep_seq' => (
is => 'ro',
isa => 'Bio::MUST::Core::Seq',
required => 1,
);
has 'genetic_code' => (
is => 'ro',
isa => 'Bio::MUST::Core::GeneticCode',
required => 1,
);
has '_ali' => (
is => 'ro',
isa => 'Bio::MUST::Core::Ali',
init_arg => undef,
writer => '_set_ali',
handles => {
all_cds => 'all_seqs',
count_cds => 'count_seqs',
},
);
has '_sugars' => (
traits => ['Array'],
is => 'ro',
isa => 'ArrayRef[Bio::MUST::Drivers::Exonerate::Sugar]',
default => sub { [] },
handles => {
add_sugar => 'push',
get_sugar => 'get',
},
);
const my @attrs => qw(
query_id query_start query_end query_strand
target_id target_start target_end target_strand
score
);
sub BUILD {
my $self = shift;
# provision executable
my $app = use_module('Bio::MUST::Provision::Exonerate')->new;
$app->meet();
# build temp Ali file for input DNA seq
my $dna = Ali->new(
seqs => [ $self->dna_seq ],
guessing => 0,
);
my $dnafile = $dna->temp_fasta;
# build temp Ali file for input PEP seq
my $pep = Ali->new(
seqs => [ $self->pep_seq ],
guessing => 0,
);
my $pepfile = $pep->temp_fasta;
# setup output file
my $outfile = $dnafile . '.exonerate.out';
# TODO: make outfile name more robust using File::Temp
# create exonerate command
my $pgm = 'exonerate';
my $code = $self->genetic_code->ncbi_id;
my $cmd = qq{$pgm --ryo ">%S\\n%tcs" --showvulgar no}
. " --showalignment no --verbose 0 --geneticcode $code"
. " --model protein2genome --query $pepfile --target $dnafile"
. " > $outfile 2> /dev/null"
;
# try to robustly execute exonerate
my $ret_code = system( [ 0, 127 ], $cmd);
if ($ret_code == 127) {
carp "[BMD] Warning: Cannot execute $pgm command; returning nothing!";
return;
}
# TODO: try to bypass shell (need for absolute path to executable then)
# read output file (FASTA format with special defline)
my $ali = Ali->load($outfile);
$self->_set_ali($ali);
# parse deflines and store them as Sugar objects
# TODO: fix coordinates for consistency with Aligned? (beware of rev_comp)
for my $id ($ali->all_seq_ids) {
my @fields = split /\s+/xms, $id->full_id;
$fields[0] = $self->pep_seq->seq_id->full_id;
$fields[4] = $self->dna_seq->seq_id->full_id;
@fields[3,7] = map {
$_ eq '-' ? -1 : # reverse
$_ eq '+' ? 1 : # forward
$_ eq '.' ? 1 : # unknown
$_
} @fields[3,7];
$self->add_sugar( Sugar->new( { mesh @attrs, @fields } ) );
}
# TODO: check value of strands / gene orientation
# unlink temp files
file($_)->remove for ($dnafile, $pepfile, $outfile);
return;
}
sub cds_order {
my $self = shift;
# return exon indices according to start pos in protein coordinates
my @order = sort {
$self->get_sugar($a)->query_start <=> $self->get_sugar($b)->query_start
} 0..$self->count_cds-1;
return @order;
( run in 2.650 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )