Bio-MLST-Check

 view release on metacpan or  search on metacpan

lib/Bio/MLST/Blast/BlastN.pm  view on Meta::CPAN

package Bio::MLST::Blast::BlastN;
# ABSTRACT: Wrapper around NCBI BlastN
$Bio::MLST::Blast::BlastN::VERSION = '2.1.1706216';

use Moose;
use Bio::MLST::Types;
use List::Util qw(reduce max min);

# input variables
has 'blast_database'     => ( is => 'ro', isa => 'Str', required => 1 ); 
has 'query_file'         => ( is => 'ro', isa => 'Str', required => 1 ); 
has 'word_sizes'         => ( is => 'ro', isa => 'HashRef', required => 1 ); 
has 'exec'               => ( is => 'ro', isa => 'Bio::MLST::Executable', default  => 'blastn' ); 
has 'perc_identity'      => ( is => 'ro', isa => 'Int', default  => 0 );

# Generated
has 'top_hit'           => ( is => 'ro', isa => 'Maybe[HashRef]', lazy => 1,  builder => '_build_top_hit' ); 

sub _build_hit
{
  my($self, $line) = @_;
  chomp($line);
  my @row = split(/\t/,$line);
  my ($start, $end) = ($row[8], $row[9]);
  ($start, $end, my $reverse) = $start <= $end ? ($start, $end, 0) : ($end, $start, 1);
  return {
    'allele_name' => $row[0],
    'source_name' => $row[1],
    'percentage_identity' => $row[2],
    'sample_alignment_length' => $row[3],
    'matches' => $row[12],
    'source_start' => $start,
    'source_end' => $end,
    'reverse' => $reverse,
  };
}

sub _build_hits
{
  my ($self, $blast_output_fh) = @_;
  my @hits;
  while(<$blast_output_fh>)
  {
    push(@hits, $self->_build_hit($_));
  }
  return \@hits;
}

sub _filter_by_alignment_length
{
  ###
  # For each allele there is a minimum length of sequence it must be aligned
  # against before it can be considered a match.
  ###
  my ($self, $hits, $word_sizes) = @_;
  my @long_hits = grep { $_->{'sample_alignment_length'} >= $word_sizes->{$_->{'allele_name'}} } @$hits;
  return \@long_hits;
}

sub _filter_best_hits
{
  my($self, $hits, $tollerance) = @_;
  $tollerance = defined($tollerance) ? $tollerance : 2.0;
  my @percentages = map { $_->{'percentage_identity'} } @$hits;
  my $top_percentage = max @percentages;
  my @top_hits = grep { $_->{'percentage_identity'} >= $top_percentage - $tollerance } @$hits;
  return \@top_hits;
}

sub _group_overlapping_hits
{
  ###
  # Hits can overlap, this groups hits which overlap and returns a reference to
  # an array of references to these groups.
  ###
  my($self, $hits) = @_;
  my @bins = ();



( run in 2.202 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )