view release on metacpan or search on metacpan
lib/Bio/MLST/Blast/BlastN.pm view on Meta::CPAN
# ABSTRACT: Wrapper around NCBI BlastN
$Bio::MLST::Blast::BlastN::VERSION = '2.1.1706216';
use Moose;
use Bio::MLST::Types;
use List::Util qw(reduce max min);
# input variables
has 'blast_database' => ( is => 'ro', isa => 'Str', required => 1 );
has 'query_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'word_sizes' => ( is => 'ro', isa => 'HashRef', required => 1 );
has 'exec' => ( is => 'ro', isa => 'Bio::MLST::Executable', default => 'blastn' );
has 'perc_identity' => ( is => 'ro', isa => 'Int', default => 0 );
# Generated
has 'top_hit' => ( is => 'ro', isa => 'Maybe[HashRef]', lazy => 1, builder => '_build_top_hit' );
sub _build_hit
{
my($self, $line) = @_;
chomp($line);
my @row = split(/\t/,$line);
my ($start, $end) = ($row[8], $row[9]);
($start, $end, my $reverse) = $start <= $end ? ($start, $end, 0) : ($end, $start, 1);
return {
'allele_name' => $row[0],
lib/Bio/MLST/Check.pm view on Meta::CPAN
use Bio::MLST::ProcessFasta;
use Bio::MLST::Spreadsheet::File;
use Bio::MLST::NormaliseFasta;
use Bio::AlignIO;
use Bio::SimpleAlign;
use File::Temp;
use Cwd;
has 'species' => ( is => 'ro', isa => 'Str', required => 1 );
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'raw_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'blastn_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_fasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'spreadsheet_basename' => ( is => 'ro', isa => 'Str', default => 'mlst_results' );
has 'output_phylip_files' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool', default => 1 );
has 'report_lowest_st' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'parallel_processes' => ( is => 'ro', isa => 'Int', default => 1 );
has '_spreadsheet_header' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_spreadsheet_allele_numbers_rows' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_spreadsheet_genomic_rows' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__input_fasta_files');
has '_concat_names' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_concat_sequences' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(DIR => getcwd, CLEANUP => 1); });
sub _generate_spreadsheet_rows
{
my($self) = @_;
my $pm = new Parallel::ForkManager($self->parallel_processes);
lib/Bio/MLST/CheckMultipleSpecies.pm view on Meta::CPAN
use Moose;
use Bio::MLST::Check;
use Bio::MLST::Databases;
use Parallel::ForkManager;
use File::Temp;
use Cwd;
use Text::CSV;
has 'species' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); # empty array searches against all databases
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'parallel_processes' => ( is => 'ro', isa => 'Int', default => 1 ); # max parallel processes
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); # output search progress and number of matches
has 'report_all_mlst_db' => ( is => 'rw', isa => 'Bool', default => 0 ); # report all mlst databases searched
has 'report_lowest_st' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'raw_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'blastn_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'spreadsheet_basename' => ( is => 'ro', isa => 'Str', default => 'mlst_results' );
has 'output_fasta_files' => ( is => 'ro', isa => 'Bool', default => 0 ); # output of fasta not supported
has 'output_phylip_files' => ( is => 'ro', isa => 'Bool', default => 0 ); # output of phylip not supported
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool', default => 1 );
has '_species_list' => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(DIR => getcwd, CLEANUP => 1); });
sub _build__species_list
{
my($self) = @_;
my @species_list = @{$self->species};
# if no species supplied then run vs all species
unless(@species_list)
{
lib/Bio/MLST/CompareAlleles.pm view on Meta::CPAN
use Moose;
use File::Basename;
use Bio::SeqIO;
use Bio::Perl;
use Bio::MLST::Blast::Database;
use Bio::MLST::Blast::BlastN;
use Bio::MLST::Types;
use Bio::MLST::SequenceType;
has 'sequence_filename' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
has 'allele_filenames' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', default => 'makeblastdb' );
has 'blastn_exec' => ( is => 'ro', isa => 'Str', default => 'blastn' );
has '_sequence_handle' => ( is => 'ro', isa => 'Bio::SeqIO::fasta', lazy => 1, builder => '_build__sequence_handle');
has '_blast_db_location_obj' => ( is => 'ro', isa => 'Bio::MLST::Blast::Database', lazy => 1, builder => '_build__blast_db_location_obj');
has '_blast_db_location' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__blast_db_location');
has 'matching_sequences' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_matching_sequences' );
has 'non_matching_sequences' => ( is => 'rw', isa => 'HashRef', default => sub {{}});
has 'contamination' => ( is => 'rw', isa => 'Bool', default => 0);
has 'contamination_alleles' => ( is => 'rw', isa => 'Maybe[Str]' );
has 'contamination_sequence_names' => ( is => 'rw', isa => 'Maybe[ArrayRef]' );
has 'new_st' => ( is => 'rw', isa => 'Bool', default => 0);
has '_absent_loci' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__absent_loci' );
has 'profiles_filename' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
sub _build__blast_db_location
{
my ($self) = @_;
return $self->_blast_db_location_obj->location();
}
sub _build__blast_db_location_obj
{
lib/Bio/MLST/Databases.pm view on Meta::CPAN
package Bio::MLST::Databases;
# ABSTRACT: List available MLST databases
$Bio::MLST::Databases::VERSION = '2.1.1706216';
use Moose;
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'database_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_builder_database_names' );
sub _builder_database_names
{
my($self) = @_;
my @only_directories;
opendir(my $dh,$self->base_directory);
my @database_names = grep { /^[^\.]/ } readdir($dh);
for my $file_or_dir_name (sort(@database_names))
{
lib/Bio/MLST/Download/Database.pm view on Meta::CPAN
package Bio::MLST::Download::Database;
# ABSTRACT: Represents a single genus-species database on a single species
$Bio::MLST::Download::Database::VERSION = '2.1.1706216';
use Moose;
with 'Bio::MLST::Download::Downloadable';
has 'database_attributes' => ( is => 'ro', isa => 'HashRef', required => 1 );
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'species' => ( is => 'ro', isa => 'Str', required => 1 );
has 'destination_directory' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_destination_directory' );
sub update
{
my ($self) = @_;
lib/Bio/MLST/Download/Databases.pm view on Meta::CPAN
use Moose;
use Bio::MLST::Download::Database;
use Parallel::ForkManager;
use Try::Tiny;
use File::Copy qw(move);
use File::Path qw(make_path rmtree);
use POSIX qw(strftime);
has 'databases_attributes' => ( is => 'ro', isa => 'HashRef', required => 1 );
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'parallel_processes' => ( is => 'ro', isa => 'Int', default => 0 );
has '_species_to_exclude' => ( is => 'ro', isa => 'Str', default => 'Pediococcus' );
sub update {
my($self) = @_;
my $paths_to_database_updates = $self->databases_attributes;
my $species_to_exclude = $self->_species_to_exclude;
lib/Bio/MLST/OutputFasta.pm view on Meta::CPAN
$Bio::MLST::OutputFasta::VERSION = '2.1.1706216';
use Moose;
use File::Basename;
use File::Path qw(make_path);
use Bio::PrimarySeq;
use Bio::SeqIO;
use Bio::MLST::Types;
has 'matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 );
has 'non_matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'input_fasta_file' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
has '_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_filename' );
has 'concat_sequence' => ( is => 'rw', isa => 'Maybe[Str]' );
sub _build__fasta_filename
{
my($self) = @_;
lib/Bio/MLST/SearchForFiles.pm view on Meta::CPAN
# ABSTRACT: Take in a species name and get the allele and profile files.
$Bio::MLST::SearchForFiles::VERSION = '2.1.1706216';
use Moose;
use Bio::MLST::Types;
has 'species_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'profiles_filename' => ( is => 'ro', isa => 'Bio::MLST::File', lazy => 1, builder => '_build_profiles_filename');
has 'allele_filenames' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_allele_filenames');
has 'search_base_directory' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__search_base_directory');
has 'list_species' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_list_species');
sub _build_list_species
{
my($self) = @_;
opendir(my $dh,$self->base_directory);
my $species_name = $self->species_name;
my $species_name_with_underscores = $self->species_name;
$species_name =~ s!\W!.+!gi;
$species_name_with_underscores =~ s!\W!_!gi;
lib/Bio/MLST/SequenceType.pm view on Meta::CPAN
use Data::Dumper;
use Text::CSV;
use List::Util qw(min reduce);
use Moose;
use Bio::MLST::Types;
use Bio::MLST::FilterAlleles qw(is_metadata);
has 'profiles_filename' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
has 'matching_names' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'non_matching_names' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'allele_to_number' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_allele_to_number' );
has '_profiles' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__profiles' );
has 'sequence_type' => ( is => 'ro', isa => 'Maybe[Str]', lazy => 1, builder => '_build_sequence_type' );
has 'nearest_sequence_type' => ( is => 'rw', isa => 'Maybe[Str]');
has 'report_lowest_st' => ( is => 'ro', isa => 'Bool', default => 0 );
sub sequence_type_or_nearest
{
my($self) = @_;
return $self->sequence_type if(defined($self->sequence_type));
# If there isn't a perfect match, add a tilde to the sequence type
lib/Bio/MLST/Spreadsheet/File.pm view on Meta::CPAN
package Bio::MLST::Spreadsheet::File;
# ABSTRACT: Create a file representation of the ST results for multiple fasta files.
$Bio::MLST::Spreadsheet::File::VERSION = '2.1.1706216';
use Moose;
use Text::CSV;
use Bio::MLST::Spreadsheet::Row;
has 'spreadsheet_allele_numbers_rows' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'spreadsheet_genomic_rows' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'spreadsheet_basename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'header' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
sub create
{
my($self) = @_;
my $base_spreadsheet_name = join('/',($self->output_directory, $self->spreadsheet_basename));
open(my $allele_fh,'+>', $base_spreadsheet_name.".allele.csv");
open(my $genomic_fh,'+>', $base_spreadsheet_name.".genomic.csv");
my $allele_csv = Text::CSV->new({sep_char=>"\t", always_quote=>1, eol=>"\r\n"});
lib/Bio/MLST/Spreadsheet/Row.pm view on Meta::CPAN
use Text::CSV;
use Bio::MLST::FilterAlleles qw(only_keep_alleles);
use Moose;
has 'sequence_type_obj' => ( is => 'ro', isa => 'Bio::MLST::SequenceType', required => 1 );
has 'compare_alleles' => ( is => 'ro', isa => 'Bio::MLST::CompareAlleles', required => 1 );
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool', default => 1 );
has 'allele_numbers_row' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_allele_numbers_row');
has 'genomic_row' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_genomic_row');
has 'header_row' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_header_row');
has '_common_cells' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__common_cells');
has '_allele_order' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__allele_order');
sub _build__common_cells
{
my($self) = @_;
#cause the variable to be built.
$self->sequence_type_obj->sequence_type;
my $new_st_cell = '';
if($self->compare_alleles->new_st )
{