Bio-MLST-Check
view release on metacpan or search on metacpan
lib/Bio/MLST/Blast/BlastN.pm view on Meta::CPAN
package Bio::MLST::Blast::BlastN;
# ABSTRACT: Wrapper around NCBI BlastN
$Bio::MLST::Blast::BlastN::VERSION = '2.1.1706216';
use Moose;
use Bio::MLST::Types;
use List::Util qw(reduce max min);
# input variables
has 'blast_database' => ( is => 'ro', isa => 'Str', required => 1 );
has 'query_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'word_sizes' => ( is => 'ro', isa => 'HashRef', required => 1 );
has 'exec' => ( is => 'ro', isa => 'Bio::MLST::Executable', default => 'blastn' );
has 'perc_identity' => ( is => 'ro', isa => 'Int', default => 0 );
# Generated
has 'top_hit' => ( is => 'ro', isa => 'Maybe[HashRef]', lazy => 1, builder => '_build_top_hit' );
sub _build_hit
{
my($self, $line) = @_;
chomp($line);
my @row = split(/\t/,$line);
my ($start, $end) = ($row[8], $row[9]);
($start, $end, my $reverse) = $start <= $end ? ($start, $end, 0) : ($end, $start, 1);
return {
'allele_name' => $row[0],
'source_name' => $row[1],
'percentage_identity' => $row[2],
'sample_alignment_length' => $row[3],
'matches' => $row[12],
'source_start' => $start,
'source_end' => $end,
'reverse' => $reverse,
};
}
sub _build_hits
{
my ($self, $blast_output_fh) = @_;
my @hits;
while(<$blast_output_fh>)
{
push(@hits, $self->_build_hit($_));
}
return \@hits;
}
sub _filter_by_alignment_length
{
###
# For each allele there is a minimum length of sequence it must be aligned
# against before it can be considered a match.
###
my ($self, $hits, $word_sizes) = @_;
my @long_hits = grep { $_->{'sample_alignment_length'} >= $word_sizes->{$_->{'allele_name'}} } @$hits;
return \@long_hits;
}
sub _filter_best_hits
{
my($self, $hits, $tollerance) = @_;
$tollerance = defined($tollerance) ? $tollerance : 2.0;
my @percentages = map { $_->{'percentage_identity'} } @$hits;
my $top_percentage = max @percentages;
my @top_hits = grep { $_->{'percentage_identity'} >= $top_percentage - $tollerance } @$hits;
return \@top_hits;
}
sub _group_overlapping_hits
{
###
# Hits can overlap, this groups hits which overlap and returns a reference to
# an array of references to these groups.
###
my($self, $hits) = @_;
my @bins = ();
( run in 2.202 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )