Bio-MLST-Check

 view release on metacpan or  search on metacpan

lib/Bio/MLST/Blast/BlastN.pm  view on Meta::CPAN

# ABSTRACT: Wrapper around NCBI BlastN
$Bio::MLST::Blast::BlastN::VERSION = '2.1.1706216';

use Moose;
use Bio::MLST::Types;
use List::Util qw(reduce max min);

# input variables
has 'blast_database'     => ( is => 'ro', isa => 'Str', required => 1 ); 
has 'query_file'         => ( is => 'ro', isa => 'Str', required => 1 ); 
has 'word_sizes'         => ( is => 'ro', isa => 'HashRef', required => 1 ); 
has 'exec'               => ( is => 'ro', isa => 'Bio::MLST::Executable', default  => 'blastn' ); 
has 'perc_identity'      => ( is => 'ro', isa => 'Int', default  => 0 );

# Generated
has 'top_hit'           => ( is => 'ro', isa => 'Maybe[HashRef]', lazy => 1,  builder => '_build_top_hit' ); 

sub _build_hit
{
  my($self, $line) = @_;
  chomp($line);
  my @row = split(/\t/,$line);
  my ($start, $end) = ($row[8], $row[9]);
  ($start, $end, my $reverse) = $start <= $end ? ($start, $end, 0) : ($end, $start, 1);
  return {
    'allele_name' => $row[0],

lib/Bio/MLST/Check.pm  view on Meta::CPAN

use Bio::MLST::ProcessFasta;
use Bio::MLST::Spreadsheet::File;
use Bio::MLST::NormaliseFasta;
use Bio::AlignIO;
use Bio::SimpleAlign;
use File::Temp;
use Cwd;

has 'species'               => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'base_directory'        => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'raw_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); 
has 'makeblastdb_exec'      => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'blastn_exec'           => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'output_directory'      => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'output_fasta_files'    => ( is => 'ro', isa => 'Bool',     default  => 0 ); 
has 'spreadsheet_basename'  => ( is => 'ro', isa => 'Str',      default  => 'mlst_results' ); 
has 'output_phylip_files'   => ( is => 'ro', isa => 'Bool',     default  => 0 ); 
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool',   default => 1 ); 
has 'report_lowest_st'      => ( is => 'ro', isa => 'Bool', default => 0 );

has 'parallel_processes'    => ( is => 'ro', isa => 'Int',      default  => 1 ); 

has '_spreadsheet_header'              => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_spreadsheet_allele_numbers_rows' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_spreadsheet_genomic_rows'        => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_input_fasta_files'    => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__input_fasta_files'); 

has '_concat_names'      => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_concat_sequences' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(DIR => getcwd, CLEANUP => 1); });

sub _generate_spreadsheet_rows
{
  my($self) = @_;

  my $pm = new Parallel::ForkManager($self->parallel_processes); 

lib/Bio/MLST/CheckMultipleSpecies.pm  view on Meta::CPAN



use Moose;
use Bio::MLST::Check;
use Bio::MLST::Databases;
use Parallel::ForkManager;
use File::Temp;
use Cwd;
use Text::CSV;

has 'species'               => ( is => 'ro', isa => 'ArrayRef', required => 1 ); # empty array searches against all databases
has 'base_directory'        => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'parallel_processes'    => ( is => 'ro', isa => 'Int',      default  => 1 ); # max parallel processes
has 'verbose'               => ( is => 'rw', isa => 'Bool',     default  => 0 ); # output search progress and number of matches
has 'report_all_mlst_db'    => ( is => 'rw', isa => 'Bool',     default  => 0 ); # report all mlst databases searched
has 'report_lowest_st'      => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'raw_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec'      => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'blastn_exec'           => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'output_directory'      => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'spreadsheet_basename'  => ( is => 'ro', isa => 'Str',      default  => 'mlst_results' ); 
has 'output_fasta_files'    => ( is => 'ro', isa => 'Bool',     default  => 0 ); # output of fasta not supported
has 'output_phylip_files'   => ( is => 'ro', isa => 'Bool',     default  => 0 ); # output of phylip not supported
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool',   default => 1 ); 

has '_species_list'         => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has '_working_directory'    => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(DIR => getcwd, CLEANUP => 1); });

sub _build__species_list
{
    my($self) = @_;
    my @species_list = @{$self->species};

    # if no species supplied then run vs all species
    unless(@species_list)
    {

lib/Bio/MLST/CompareAlleles.pm  view on Meta::CPAN

use Moose;
use File::Basename;
use Bio::SeqIO;
use Bio::Perl;
use Bio::MLST::Blast::Database;
use Bio::MLST::Blast::BlastN;
use Bio::MLST::Types;
use Bio::MLST::SequenceType;

has 'sequence_filename'      => ( is => 'ro', isa => 'Bio::MLST::File',      required => 1 );
has 'allele_filenames'       => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec'       => ( is => 'ro', isa => 'Str',      default  => 'makeblastdb' );
has 'blastn_exec'            => ( is => 'ro', isa => 'Str',      default  => 'blastn' );

has '_sequence_handle'       => ( is => 'ro', isa => 'Bio::SeqIO::fasta',     lazy => 1,  builder => '_build__sequence_handle');
has '_blast_db_location_obj' => ( is => 'ro', isa => 'Bio::MLST::Blast::Database', lazy => 1,  builder => '_build__blast_db_location_obj');
has '_blast_db_location'     => ( is => 'ro', isa => 'Str',                   lazy => 1,  builder => '_build__blast_db_location');

has 'matching_sequences'     => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_matching_sequences' );
has 'non_matching_sequences' => ( is => 'rw', isa => 'HashRef', default => sub {{}});
has 'contamination'          => ( is => 'rw', isa => 'Bool',    default => 0);
has 'contamination_alleles'  => ( is => 'rw', isa => 'Maybe[Str]' );
has 'contamination_sequence_names' => ( is => 'rw', isa => 'Maybe[ArrayRef]' );
has 'new_st'                 => ( is => 'rw', isa => 'Bool',    default => 0);
has '_absent_loci'           => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__absent_loci' );
has 'profiles_filename'     => ( is => 'ro', isa => 'Bio::MLST::File',        required => 1 ); 

sub _build__blast_db_location
{
  my ($self) = @_;
  return $self->_blast_db_location_obj->location();
}

sub _build__blast_db_location_obj
{

lib/Bio/MLST/Databases.pm  view on Meta::CPAN

package Bio::MLST::Databases;
# ABSTRACT: List available MLST databases
$Bio::MLST::Databases::VERSION = '2.1.1706216';

use Moose;

has 'base_directory'    => ( is => 'ro', isa => 'Str',      required => 1 );
has 'database_names'    => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_builder_database_names' );

sub _builder_database_names
{
  my($self) = @_;
  my @only_directories;
  opendir(my $dh,$self->base_directory);
  my @database_names = grep { /^[^\.]/ } readdir($dh);

  for my $file_or_dir_name (sort(@database_names))
  {

lib/Bio/MLST/Download/Database.pm  view on Meta::CPAN

package Bio::MLST::Download::Database;
# ABSTRACT: Represents a single genus-species database on a single species
$Bio::MLST::Download::Database::VERSION = '2.1.1706216';


use Moose;

with 'Bio::MLST::Download::Downloadable';

has 'database_attributes'  => ( is => 'ro', isa => 'HashRef', required => 1 );
has 'base_directory'       => ( is => 'ro', isa => 'Str',     required => 1 );
has 'species'              => ( is => 'ro', isa => 'Str',     required => 1 );

has 'destination_directory' => ( is => 'ro', isa => 'Str',     lazy => 1, builder => '_build_destination_directory' );


sub update
{
  my ($self) = @_;

lib/Bio/MLST/Download/Databases.pm  view on Meta::CPAN


use Moose;
use Bio::MLST::Download::Database;
use Parallel::ForkManager;

use Try::Tiny;
use File::Copy qw(move);
use File::Path qw(make_path rmtree);
use POSIX qw(strftime);

has 'databases_attributes' => ( is => 'ro', isa => 'HashRef', required => 1 );
has 'base_directory'       => ( is => 'ro', isa => 'Str',     required => 1 );

has 'parallel_processes'   => ( is => 'ro', isa => 'Int',     default => 0 );

has '_species_to_exclude'  => ( is => 'ro', isa => 'Str',     default => 'Pediococcus' );

sub update {
   my($self) = @_;
   my $paths_to_database_updates = $self->databases_attributes;
   my $species_to_exclude = $self->_species_to_exclude;

lib/Bio/MLST/OutputFasta.pm  view on Meta::CPAN

$Bio::MLST::OutputFasta::VERSION = '2.1.1706216';


use Moose;
use File::Basename;
use File::Path qw(make_path);
use Bio::PrimarySeq;
use Bio::SeqIO;
use Bio::MLST::Types;

has 'matching_sequences'      => ( is => 'ro', isa => 'Maybe[HashRef]',      required => 1 ); 
has 'non_matching_sequences'  => ( is => 'ro', isa => 'Maybe[HashRef]',      required => 1 ); 
has 'output_directory'        => ( is => 'ro', isa => 'Str',          required => 1 ); 
has 'input_fasta_file'        => ( is => 'ro', isa => 'Bio::MLST::File',          required => 1 ); 

has '_fasta_filename'         => ( is => 'ro', isa => 'Str',          lazy => 1, builder => '_build__fasta_filename' ); 
has 'concat_sequence'         => ( is => 'rw', isa => 'Maybe[Str]' );


sub _build__fasta_filename
{
  my($self) = @_;

lib/Bio/MLST/SearchForFiles.pm  view on Meta::CPAN

# ABSTRACT: Take in a species name and get the allele and profile files.
$Bio::MLST::SearchForFiles::VERSION = '2.1.1706216';

use Moose;
use Bio::MLST::Types;

has 'species_name'      => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'base_directory'    => ( is => 'ro', isa => 'Str',      required => 1 ); 

has 'profiles_filename'     => ( is => 'ro', isa => 'Bio::MLST::File',      lazy => 1, builder => '_build_profiles_filename');
has 'allele_filenames'      => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_allele_filenames');
has 'search_base_directory' => ( is => 'ro', isa => 'Str',      lazy => 1, builder => '_build__search_base_directory');
has 'list_species'          => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_list_species');

sub _build_list_species
{
  my($self) = @_;
  opendir(my $dh,$self->base_directory);
  my $species_name = $self->species_name;
  my $species_name_with_underscores = $self->species_name;
  $species_name =~ s!\W!.+!gi;
  $species_name_with_underscores =~ s!\W!_!gi;
  

lib/Bio/MLST/SequenceType.pm  view on Meta::CPAN


use Data::Dumper;
use Text::CSV;
use List::Util qw(min reduce);

use Moose;
use Bio::MLST::Types;
use Bio::MLST::FilterAlleles qw(is_metadata);

has 'profiles_filename'     => ( is => 'ro', isa => 'Bio::MLST::File',        required => 1 );
has 'matching_names'        => ( is => 'ro', isa => 'ArrayRef',   required => 1 );
has 'non_matching_names'        => ( is => 'ro', isa => 'ArrayRef',   required => 1 );

has 'allele_to_number'      => ( is => 'ro', isa => 'HashRef',    lazy => 1, builder => '_build_allele_to_number' );
has '_profiles'             => ( is => 'ro', isa => 'ArrayRef',   lazy => 1, builder => '_build__profiles' );
has 'sequence_type'         => ( is => 'ro', isa => 'Maybe[Str]', lazy => 1, builder => '_build_sequence_type' );

has 'nearest_sequence_type' => ( is => 'rw', isa => 'Maybe[Str]');
has 'report_lowest_st'  => ( is => 'ro', isa => 'Bool', default => 0 );

sub sequence_type_or_nearest
{
  my($self) = @_;
  return $self->sequence_type if(defined($self->sequence_type));
  # If there isn't a perfect match, add a tilde to the sequence type

lib/Bio/MLST/Spreadsheet/File.pm  view on Meta::CPAN

package Bio::MLST::Spreadsheet::File;
# ABSTRACT: Create a file representation of the ST results for multiple fasta files.
$Bio::MLST::Spreadsheet::File::VERSION = '2.1.1706216';

use Moose;
use Text::CSV;
use Bio::MLST::Spreadsheet::Row;

has 'spreadsheet_allele_numbers_rows'      => ( is => 'ro', isa => 'ArrayRef', required => 1 ); 
has 'spreadsheet_genomic_rows'             => ( is => 'ro', isa => 'ArrayRef', required => 1 ); 
has 'output_directory'      => ( is => 'ro', isa => 'Str', required => 1 ); 
has 'spreadsheet_basename'  => ( is => 'ro', isa => 'Str', required => 1 ); 

has 'header'           => ( is => 'ro', isa => 'ArrayRef', required => 1 ); 

sub create
{
  my($self) = @_;
  my $base_spreadsheet_name = join('/',($self->output_directory, $self->spreadsheet_basename));
  
  open(my $allele_fh,'+>', $base_spreadsheet_name.".allele.csv");
  open(my $genomic_fh,'+>', $base_spreadsheet_name.".genomic.csv");
  
  my $allele_csv = Text::CSV->new({sep_char=>"\t", always_quote=>1, eol=>"\r\n"});

lib/Bio/MLST/Spreadsheet/Row.pm  view on Meta::CPAN

use Text::CSV;

use Bio::MLST::FilterAlleles qw(only_keep_alleles);

use Moose;

has 'sequence_type_obj'  => ( is => 'ro', isa => 'Bio::MLST::SequenceType',     required => 1 ); 
has 'compare_alleles'    => ( is => 'ro', isa => 'Bio::MLST::CompareAlleles',   required => 1 ); 
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool',   default => 1 ); 
                        
has 'allele_numbers_row' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_allele_numbers_row'); 
has 'genomic_row'        => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_genomic_row'); 
has 'header_row'         => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_header_row'); 
has '_common_cells'      => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__common_cells'); 
has '_allele_order'      => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__allele_order'); 

sub _build__common_cells
{
  my($self) = @_;
  
  #cause the variable to be built.
  $self->sequence_type_obj->sequence_type;
  my $new_st_cell = '';
  if($self->compare_alleles->new_st )
  {



( run in 0.457 second using v1.01-cache-2.11-cpan-5f2e87ce722 )