Bio-MUST-Drivers
view release on metacpan or search on metacpan
bin/annotate-ali.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: annotate-ali.pl
# ABSTRACT: Annotate sequences by homology search using BLAST
# CONTRIBUTOR: Valerian LUPO <valerian.lupo@uliege.be>
use Modern::Perl '2011';
use autodie;
use Getopt::Euclid qw(:vars);
use Smart::Comments '###';
use Tie::IxHash;
use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(secure_outfile);
use Bio::MUST::Drivers;
# TODO: add support for prebuilt reference database (e.g. nr)
# convert fractional identity threshold to percentage (see Euclid)
$ARGV_identity *= 100.0 if 0 < $ARGV_identity && $ARGV_identity <= 1;
### Building database: $ARGV_ref_file
my $blastdb = Bio::MUST::Drivers::Blast::Database::Temporary->new(
seqs => $ARGV_ref_file
);
for my $infile (@ARGV_infiles) {
### Processing: $infile
my $queries = Bio::MUST::Core::Ali::Temporary->new( seqs => $infile );
# let driver decide BLAST flavor except if nucl:nucl and --tblastx option
my $method = $queries->type eq 'nucl' && $blastdb->type eq 'nucl'
&& $ARGV_tblastx ? 'tblastx' : 'blast';
#### Performing BLAST...
my $parser = $blastdb->$method($queries, {
-outfmt => 6,
-evalue => $ARGV_evalue,
-max_target_seqs => $ARGV_max_hits
} );
#### Parsing BLAST report...
tie my %ann_for, 'Tie::IxHash';
tie my %hit_id_for, 'Tie::IxHash';
my $curr_id = q{};
HIT:
while ( my $hit = $parser->next_hit ) {
my ($qid, $hid, $identity)
= map { $hit->$_ } qw(query_id hit_id percent_identity);
next HIT if $identity < $ARGV_identity; # skip weak-identity hits
unless ($ARGV_hit_list) { # optionally
next HIT if $qid eq $curr_id; # skip non-first hits
$curr_id = $qid;
}
# capture annotation bit in ref seq_id using regex
my ($annotation) = $blastdb->long_id_for($hid) =~ $ARGV_ref_regex;
my $full_qid = $queries->long_id_for($qid);
$ann_for{$full_qid} //= $annotation; # use only first hit
( run in 0.457 second using v1.01-cache-2.11-cpan-5837b0d9d2c )