Bio-MLST-Check
view release on metacpan or search on metacpan
lib/Bio/MLST/CompareAlleles.pm view on Meta::CPAN
package Bio::MLST::CompareAlleles;
$Bio::MLST::CompareAlleles::VERSION = '2.1.1706216';
# ABSTRACT: Get a list of matching alleles between the sequence and database
use Moose;
use File::Basename;
use Bio::SeqIO;
use Bio::Perl;
use Bio::MLST::Blast::Database;
use Bio::MLST::Blast::BlastN;
use Bio::MLST::Types;
use Bio::MLST::SequenceType;
has 'sequence_filename' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
has 'allele_filenames' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', default => 'makeblastdb' );
has 'blastn_exec' => ( is => 'ro', isa => 'Str', default => 'blastn' );
has '_sequence_handle' => ( is => 'ro', isa => 'Bio::SeqIO::fasta', lazy => 1, builder => '_build__sequence_handle');
has '_blast_db_location_obj' => ( is => 'ro', isa => 'Bio::MLST::Blast::Database', lazy => 1, builder => '_build__blast_db_location_obj');
has '_blast_db_location' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__blast_db_location');
has 'matching_sequences' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_matching_sequences' );
has 'non_matching_sequences' => ( is => 'rw', isa => 'HashRef', default => sub {{}});
has 'contamination' => ( is => 'rw', isa => 'Bool', default => 0);
has 'contamination_alleles' => ( is => 'rw', isa => 'Maybe[Str]' );
has 'contamination_sequence_names' => ( is => 'rw', isa => 'Maybe[ArrayRef]' );
has 'new_st' => ( is => 'rw', isa => 'Bool', default => 0);
has '_absent_loci' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__absent_loci' );
has 'profiles_filename' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
sub _build__blast_db_location
{
my ($self) = @_;
return $self->_blast_db_location_obj->location();
}
sub _build__blast_db_location_obj
{
my ($self) = @_;
return Bio::MLST::Blast::Database->new(fasta_file => $self->sequence_filename, exec => $self->makeblastdb_exec);
}
sub _build__sequence_handle
{
my ($self) = @_;
return Bio::SeqIO->new( -file => $self->sequence_filename , -format => 'Fasta');
}
sub sequence_filename_root
{
my ($self) = @_;
$self->_get_base_filename($self->sequence_filename);
}
sub found_sequence_names
{
my ($self) = @_;
my @sequence_names = sort(keys %{$self->matching_sequences});
return \@sequence_names;
}
sub found_non_matching_sequence_names
{
my ($self) = @_;
my @sequence_names = sort(keys %{$self->non_matching_sequences});
return \@sequence_names;
}
sub _word_sizes_for_given_allele_file
{
my ($self,$filename) = @_;
my %seq_lens;
my $seqio = Bio::SeqIO->new( -file => $filename , -format => 'Fasta');
while( my $seq = $seqio->next_seq() ){
$seq_lens{$seq->primary_id} = $seq->length;
}
return \%seq_lens;
}
sub _get_word_size_from_blast_hit {
my ( $self, $word_sizes, $blast_hit, $allele_filename ) = @_;
# return len of top blast hit allele, otherwise return len of first seq in allele file
my ($word_size, $first_seq);
if( defined $blast_hit->{allele_name} ){
$word_size = $word_sizes->{$blast_hit->{allele_name}};
( run in 0.576 second using v1.01-cache-2.11-cpan-39bf76dae61 )