Bio-MUST-Drivers

 view release on metacpan or  search on metacpan

bin/annotate-ali.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: annotate-ali.pl
# ABSTRACT: Annotate sequences by homology search using BLAST
# CONTRIBUTOR: Valerian LUPO <valerian.lupo@uliege.be>

use Modern::Perl '2011';
use autodie;

use Getopt::Euclid qw(:vars);
use Smart::Comments '###';

use Tie::IxHash;

use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(secure_outfile);
use Bio::MUST::Drivers;


# TODO: add support for prebuilt reference database (e.g. nr)

# convert fractional identity threshold to percentage (see Euclid)
$ARGV_identity *= 100.0 if 0 < $ARGV_identity && $ARGV_identity <= 1;

### Building database: $ARGV_ref_file
my $blastdb = Bio::MUST::Drivers::Blast::Database::Temporary->new(
    seqs => $ARGV_ref_file
);

for my $infile (@ARGV_infiles) {

    ### Processing: $infile
    my $queries = Bio::MUST::Core::Ali::Temporary->new( seqs => $infile );

    # let driver decide BLAST flavor except if nucl:nucl and --tblastx option
    my $method = $queries->type eq 'nucl' && $blastdb->type eq 'nucl'
        && $ARGV_tblastx ? 'tblastx' : 'blast';

    #### Performing BLAST...
    my $parser = $blastdb->$method($queries, {
        -outfmt => 6,
        -evalue => $ARGV_evalue,
        -max_target_seqs => $ARGV_max_hits
    } );

    #### Parsing BLAST report...
    tie my %ann_for,    'Tie::IxHash';
    tie my %hit_id_for, 'Tie::IxHash';
    my $curr_id = q{};

    HIT:
    while ( my $hit = $parser->next_hit ) {
        my ($qid, $hid, $identity)
            = map { $hit->$_ } qw(query_id hit_id percent_identity);

        next HIT if $identity < $ARGV_identity;     # skip weak-identity hits

        unless ($ARGV_hit_list) {                   # optionally
            next HIT if $qid eq $curr_id;           # skip non-first hits
            $curr_id = $qid;
        }

        # capture annotation bit in ref seq_id using regex
        my ($annotation) = $blastdb->long_id_for($hid) =~ $ARGV_ref_regex;
        my $full_qid = $queries->long_id_for($qid);
        $ann_for{$full_qid} //= $annotation;        # use only first hit



( run in 0.457 second using v1.01-cache-2.11-cpan-5837b0d9d2c )