Bio-MLST-Check

 view release on metacpan or  search on metacpan

lib/Bio/MLST/CompareAlleles.pm  view on Meta::CPAN

package Bio::MLST::CompareAlleles;
$Bio::MLST::CompareAlleles::VERSION = '2.1.1706216';
# ABSTRACT: Get a list of matching alleles between the sequence and database


use Moose;
use File::Basename;
use Bio::SeqIO;
use Bio::Perl;
use Bio::MLST::Blast::Database;
use Bio::MLST::Blast::BlastN;
use Bio::MLST::Types;
use Bio::MLST::SequenceType;

has 'sequence_filename'      => ( is => 'ro', isa => 'Bio::MLST::File',      required => 1 );
has 'allele_filenames'       => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec'       => ( is => 'ro', isa => 'Str',      default  => 'makeblastdb' );
has 'blastn_exec'            => ( is => 'ro', isa => 'Str',      default  => 'blastn' );

has '_sequence_handle'       => ( is => 'ro', isa => 'Bio::SeqIO::fasta',     lazy => 1,  builder => '_build__sequence_handle');
has '_blast_db_location_obj' => ( is => 'ro', isa => 'Bio::MLST::Blast::Database', lazy => 1,  builder => '_build__blast_db_location_obj');
has '_blast_db_location'     => ( is => 'ro', isa => 'Str',                   lazy => 1,  builder => '_build__blast_db_location');

has 'matching_sequences'     => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_matching_sequences' );
has 'non_matching_sequences' => ( is => 'rw', isa => 'HashRef', default => sub {{}});
has 'contamination'          => ( is => 'rw', isa => 'Bool',    default => 0);
has 'contamination_alleles'  => ( is => 'rw', isa => 'Maybe[Str]' );
has 'contamination_sequence_names' => ( is => 'rw', isa => 'Maybe[ArrayRef]' );
has 'new_st'                 => ( is => 'rw', isa => 'Bool',    default => 0);
has '_absent_loci'           => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__absent_loci' );
has 'profiles_filename'     => ( is => 'ro', isa => 'Bio::MLST::File',        required => 1 ); 

sub _build__blast_db_location
{
  my ($self) = @_;
  return $self->_blast_db_location_obj->location();
}

sub _build__blast_db_location_obj
{
  my ($self) = @_;
  return Bio::MLST::Blast::Database->new(fasta_file => $self->sequence_filename, exec => $self->makeblastdb_exec);
}


sub _build__sequence_handle
{
  my ($self) = @_;
  return Bio::SeqIO->new( -file => $self->sequence_filename , -format => 'Fasta');
}

sub sequence_filename_root
{
  my ($self) = @_;
  $self->_get_base_filename($self->sequence_filename);
}

sub found_sequence_names
{
  my ($self) = @_;
  my @sequence_names = sort(keys %{$self->matching_sequences});
  return \@sequence_names;
}

sub found_non_matching_sequence_names
{
  my ($self) = @_;
  my @sequence_names = sort(keys %{$self->non_matching_sequences});
  return \@sequence_names;
}


sub _word_sizes_for_given_allele_file
{
  my ($self,$filename) = @_;
  my %seq_lens;
  my $seqio = Bio::SeqIO->new( -file => $filename , -format => 'Fasta');
  while( my $seq = $seqio->next_seq() ){
    $seq_lens{$seq->primary_id} = $seq->length;
  }
  return \%seq_lens;
}

sub _get_word_size_from_blast_hit {
  my ( $self, $word_sizes, $blast_hit, $allele_filename ) = @_;

  # return len of top blast hit allele, otherwise return len of first seq in allele file
  my ($word_size, $first_seq);
  if( defined $blast_hit->{allele_name} ){
    $word_size = $word_sizes->{$blast_hit->{allele_name}};



( run in 0.576 second using v1.01-cache-2.11-cpan-39bf76dae61 )