view release on metacpan or search on metacpan
lib/Bio/Roary/AccessoryBinaryFasta.pm view on Meta::CPAN
use Moose;
use POSIX;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::AnalyseGroups;
use Bio::Roary::Exceptions;
use Bio::SeqIO;
use File::Basename;
has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory_binary_genes.fa' );
has 'lower_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 );
has 'upper_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 );
has 'max_accessory_to_include' => ( is => 'ro', isa => 'Int', default => 4000 );
has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
has '_lower_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__lower_bound_value' );
has '_upper_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__upper_bound_value' );
sub _build__groups_to_files {
my ($self) = @_;
my %groups_to_files;
for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
my %filenames;
for my $gene_name ( @{$genes} ) {
lib/Bio/Roary/AccessoryClustering.pm view on Meta::CPAN
use Moose;
use Bio::Roary::External::Cdhit;
with 'Bio::Roary::ClustersRole';
has 'input_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'identity' => ( is => 'ro', isa => 'Num', default => 0.9 );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has '_output_cd_hit_filename' => ( is => 'ro', isa => 'Str', default => '_accessory_clusters' );
has 'clusters_to_samples' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_clusters_to_samples' );
has 'samples_to_clusters' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_samples_to_clusters' );
has 'sample_weights' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_sample_weights' );
has 'clusters_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_clusters_filename' );
has 'clusters' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__clusters' );
sub _build_sample_weights {
my ($self) = @_;
my %sample_weights;
for my $cluster_name ( keys %{ $self->clusters_to_samples } ) {
my $cluster_size = @{ $self->clusters_to_samples->{$cluster_name} };
for my $sample_name ( @{ $self->clusters_to_samples->{$cluster_name} } ) {
$sample_weights{$sample_name} = 1 / $cluster_size;
}
}
lib/Bio/Roary/AnalyseGroups.pm view on Meta::CPAN
package Bio::Roary::AnalyseGroups;
$Bio::Roary::AnalyseGroups::VERSION = '3.13.0';
# ABSTRACT: Take in a groups file and the original FASTA files and create plots and stats
use Moose;
use Bio::Roary::Exceptions;
has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'groups_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'summary_of_groups' );
has '_number_of_isolates' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' );
has '_genes_to_file' => ( is => 'rw', isa => 'HashRef' );
has '_files_to_genes' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__files_to_genes' );
has '_groups_to_genes' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_genes' );
has '_genes_to_groups' => ( is => 'rw', isa => 'HashRef' );
has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_builder__groups' );
sub BUILD {
my ($self) = @_;
# This triggers _genes_to_groups to be built
$self->_groups_to_genes;
# This triggers _genes_to_file to be built
$self->_files_to_genes;
}
lib/Bio/Roary/AnnotateGroups.pm view on Meta::CPAN
# ABSTRACT: Take in a group file and associated GFF files for the isolates and update the group name to the gene name
use Moose;
use Bio::Roary::Exceptions;
use Bio::Roary::GeneNamesFromGFF;
use Array::Utils qw(array_minus);
use List::Util qw(max min sum);
use File::Grep qw(fgrep);
has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'reannotated_groups_file' );
has 'groups_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has '_ids_to_gene_names' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__ids_to_gene_names' );
has '_ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has '_ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'group_nucleotide_lengths' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_group_nucleotide_lengths');
has '_groups_to_id_names' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_id_names' );
has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
has '_groups_to_consensus_gene_names' =>
( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_consensus_gene_names' );
has '_filtered_gff_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__filtered_gff_files' );
has '_number_of_files' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__number_of_files' );
has '_ids_to_groups' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_builder__ids_to_groups' );
has '_group_counter' => ( is => 'rw', isa => 'Int', lazy => 1, builder => '_builder__group_counter' );
has '_group_default_prefix' => ( is => 'rw', isa => 'Str', default => 'group_' );
has '_ids_to_verbose_stats' => ( is => 'rw', isa => 'HashRef', lazy_build => 1 );
sub BUILD {
my ($self) = @_;
$self->_ids_to_gene_names;
}
lib/Bio/Roary/AssemblyStatistics.pm view on Meta::CPAN
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'assembly_statistics.csv' );
has 'job_runner' => ( is => 'ro', isa => 'Str', default => 'Local' );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
has '_cloud_percentage' => ( is => 'rw', isa => 'Num', default => 0.15 );
has '_shell_percentage' => ( is => 'rw', isa => 'Num', default => 0.95 );
has '_soft_core_percentage' => ( is => 'rw', isa => 'Num', default => 0.99 );
has 'verbose' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'contiguous_window' => ( is => 'ro', isa => 'Int', default => 10 );
has 'ordered_genes' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_genes' );
has '_genes_to_rows' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__genes_to_rows' );
has 'all_sample_statistics' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_all_sample_statistics' );
has 'sample_names_to_column_index' => ( is => 'rw', isa => 'Maybe[HashRef]' );
has 'summary_output_filename'=> ( is => 'ro', isa => 'Str', default => 'summary_statistics.txt' );
has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger');
has 'gene_category_count' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_gene_category_count' );
sub BUILD {
my ($self) = @_;
$self->_genes_to_rows;
$self->gene_category_count;
}
sub _build_logger
{
my ($self) = @_;
lib/Bio/Roary/ChunkFastaFile.pm view on Meta::CPAN
use Moose;
use Bio::SeqIO;
use Bio::Roary::Exceptions;
use Cwd;
use File::Temp;
has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'target_chunk_size' => ( is => 'ro', isa => 'Int', default => 200000 );
has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_sequence_file_names' );
has '_working_directory' =>
( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
sub _build__working_directory_name {
my ($self) = @_;
return $self->_working_directory->dirname();
}
lib/Bio/Roary/CombinedProteome.pm view on Meta::CPAN
package Bio::Roary::CombinedProteome;
$Bio::Roary::CombinedProteome::VERSION = '3.13.0';
# ABSTRACT: Take in multiple FASTA sequences containing proteomes and concat them together and output a FASTA file, filtering out more than 5% X's
use Moose;
use Bio::Roary::Exceptions;
has 'proteome_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'combined_output.fa' );
sub BUILD {
my ($self) = @_;
for my $filename ( @{ $self->proteome_files } ) {
Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $filename )
unless ( -e $filename );
}
}
lib/Bio/Roary/CommandLine/AssemblyStatistics.pm view on Meta::CPAN
package Bio::Roary::CommandLine::AssemblyStatistics;
$Bio::Roary::CommandLine::AssemblyStatistics::VERSION = '3.13.0';
# ABSTRACT: Given a spreadsheet of gene presence and absence calculate some statistics
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::AssemblyStatistics;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'spreadsheet' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' );
has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'assembly_statistics.csv' );
has 'version' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
lib/Bio/Roary/CommandLine/ExtractProteomeFromGff.pm view on Meta::CPAN
$Bio::Roary::CommandLine::ExtractProteomeFromGff::VERSION = '3.13.0';
# ABSTRACT: Take in GFF files and output the proteome
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::ExtractProteomeFromGFF;
use File::Basename;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'gff_files' => ( is => 'rw', isa => 'ArrayRef' );
has 'output_suffix' => ( is => 'rw', isa => 'Str', default => 'proteome.faa' );
has '_error_message' => ( is => 'rw', isa => 'Str' );
has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'output_directory' => ( is => 'rw', isa => 'Str', default => '.' );
lib/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.pm view on Meta::CPAN
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use File::Copy;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::External::Prank;
use Bio::Roary::Output::GroupsMultifastaProtein;
use Bio::Roary::SortFasta;
use Bio::Roary::External::Mafft;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'nucleotide_fasta_files' => ( is => 'rw', isa => 'ArrayRef' );
has '_error_message' => ( is => 'rw', isa => 'Str' );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_min_similarity' => ( is => 'rw', isa => 'Num', default => 0.98 );
sub BUILD {
lib/Bio/Roary/CommandLine/IterativeCdhit.pm view on Meta::CPAN
package Bio::Roary::CommandLine::IterativeCdhit;
$Bio::Roary::CommandLine::IterativeCdhit::VERSION = '3.13.0';
# ABSTRACT: Iteratively run cdhit
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::IterativeCdhit;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message' => ( is => 'rw', isa => 'Str' );
has 'output_cd_hit_filename' => ( is => 'rw', isa => 'Str', default => '_clustered' );
has 'output_combined_filename' => ( is => 'rw', isa => 'Str', default => '_combined_files' );
has 'number_of_input_files' => ( is => 'rw', isa => 'Int', default => 1 );
has 'output_filtered_clustered_fasta' => ( is => 'rw', isa => 'Str', default => '_clustered_filtered.fa' );
has 'lower_bound_percentage' => ( is => 'rw', isa => 'Num', default => 0.98 );
lib/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.pm view on Meta::CPAN
# ABSTRACT: Take in a FASTA file of proteins and blast against itself
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::ParallelAllAgainstAllBlast;
use Bio::Roary::CombinedProteome;
use Bio::Roary::PrepareInputFiles;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef' );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'blast_results' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' );
has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
lib/Bio/Roary/CommandLine/Roary.pm view on Meta::CPAN
use Bio::Roary::QC::Report;
use Bio::Roary::ReformatInputGFFs;
use Bio::Roary::External::CheckTools;
use File::Which;
use File::Path qw(make_path);
use Cwd qw(abs_path getcwd);
use File::Temp;
use File::Basename;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_directory' => ( is => 'rw', isa => 'Str', default => '.' );
has '_original_directory' => ( is => 'rw', isa => 'Str', default => '.' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm view on Meta::CPAN
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Cwd 'abs_path';
use File::Path qw(remove_tree);
use Bio::Roary::ExtractCoreGenesFromSpreadsheet;
use Bio::Roary::LookupGeneFiles;
use Bio::Roary::MergeMultifastaAlignments;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'multifasta_base_directory' => ( is => 'rw', isa => 'Str', default => 'pan_genome_sequences' );
has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' );
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message' => ( is => 'rw', isa => 'Str' );
lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm view on Meta::CPAN
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::PostAnalysis;
use File::Find::Rule;
use Bio::Roary::External::GeneAlignmentFromNucleotides;
use File::Path qw(remove_tree);
use Bio::Roary::External::Fasttree;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message' => ( is => 'rw', isa => 'Str' );
has 'fasta_files' => ( is => 'rw', isa => 'Str', default => '_fasta_files' );
has 'input_files' => ( is => 'rw', isa => 'Str', default => '_gff_files');
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_multifasta_files' => ( is => 'rw', isa => 'Bool', default => 0 );
lib/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.pm view on Meta::CPAN
package Bio::Roary::CommandLine::RoaryReorderSpreadsheet;
$Bio::Roary::CommandLine::RoaryReorderSpreadsheet::VERSION = '3.13.0';
# ABSTRACT: Take in a tree and a spreadsheet and output a reordered spreadsheet
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::ReorderSpreadsheet;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'tree_file' => ( is => 'rw', isa => 'Str' );
has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'reordered_spreadsheet.csv' );
has 'tree_format' => ( is => 'rw', isa => 'Str', default => 'newick' );
has 'search_strategy' => ( is => 'rw', isa => 'Str', default => 'depth' );
has 'sortby' => ( is => 'rw', isa => 'Str', default => 'height');
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
lib/Bio/Roary/CommandLine/TransferAnnotationToGroups.pm view on Meta::CPAN
$Bio::Roary::CommandLine::TransferAnnotationToGroups::VERSION = '3.13.0';
# ABSTRACT: Take in a groups file and a set of GFF files and transfer the consensus annotation
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::AnnotateGroups;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'gff_files' => ( is => 'rw', isa => 'ArrayRef' );
has 'groups_filename' => ( is => 'rw', isa => 'Str' );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'reannotated_groups' );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message' => ( is => 'rw', isa => 'Str' );
sub BUILD {
lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm view on Meta::CPAN
$Bio::Roary::CommandLine::UniqueGenesPerSample::VERSION = '3.13.0';
# ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::UniqueGenesPerSample;
extends 'Bio::Roary::CommandLine::Common';
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'clustered_proteins' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'unique_genes_per_sample.tsv' );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message' => ( is => 'rw', isa => 'Str' );
sub BUILD {
my ($self) = @_;
lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm view on Meta::CPAN
$Bio::Roary::ContigsToGeneIDsFromGFF::VERSION = '3.13.0';
# ABSTRACT: Parse a GFF and efficiently and extract ordered gene ids on each contig
use Moose;
use Bio::Tools::GFF;
with 'Bio::Roary::ParseGFFAnnotationRole';
has 'contig_to_ids' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build_contig_to_ids');
has 'overlapping_hypothetical_protein_ids' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_overlapping_hypothetical_protein_ids');
has '_genes_annotation' => ( is => 'rw', isa => 'ArrayRef', default => sub{[]});
has '_min_nucleotide_overlap_percentage' => ( is => 'ro', isa => 'Int', default => 10);
# Manually parse the GFF file because the BioPerl module is too slow
sub _build_contig_to_ids
{
my ($self) = @_;
my %contigs_to_ids;
my @genes_annotation;
lib/Bio/Roary/External/GeneAlignmentFromNucleotides.pm view on Meta::CPAN
package Bio::Roary::External::GeneAlignmentFromNucleotides;
$Bio::Roary::External::GeneAlignmentFromNucleotides::VERSION = '3.13.0';
# ABSTRACT: Take in multi-FASTA files of nucleotides and align each file with PRANK or MAFFT
use Moose;
with 'Bio::Roary::JobRunner::Role';
has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'exec' => ( is => 'ro', isa => 'Str', default => 'protein_alignment_from_nucleotides' );
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'num_input_files' => ( is => 'ro', isa => 'Int', required => 1);
# Overload Role`
has 'memory_in_mb' => ( is => 'rw', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
lib/Bio/Roary/External/PostAnalysis.pm view on Meta::CPAN
package Bio::Roary::External::PostAnalysis;
$Bio::Roary::External::PostAnalysis::VERSION = '3.13.0';
# ABSTRACT: Perform the post analysis
use Moose;
use Cwd qw(getcwd);
with 'Bio::Roary::JobRunner::Role';
has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'exec' => ( is => 'ro', isa => 'Str', default => 'pan_genome_post_analysis' );
has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_pan_geneome_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_statistics_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'clusters_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', required => 1 );
has 'dont_delete_files' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'dont_split_groups' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
lib/Bio/Roary/ExtractCoreGenesFromSpreadsheet.pm view on Meta::CPAN
use Moose;
use Text::CSV;
use Bio::Roary::GroupStatistics;
use POSIX;
has 'spreadsheet' => ( is => 'ro', isa => 'Str', required => 1 );
has '_csv_parser' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__csv_parser' );
has '_input_spreadsheet_fh' => ( is => 'ro', lazy => 1, builder => '_build__input_spreadsheet_fh' );
has 'ordered_core_genes' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_core_genes' );
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
has 'sample_names' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
has 'sample_names_to_genes' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_number_of_isolates' => ( is => 'rw', isa => 'Int' );
has '_gene_column' => ( is => 'rw', isa => 'Int' );
has '_num_isolates_column' => ( is => 'rw', isa => 'Int' );
has '_avg_sequences_per_isolate_column' => ( is => 'rw', isa => 'Int' );
has '_genome_fragement_column' => ( is => 'rw', isa => 'Int' );
lib/Bio/Roary/ExtractProteomeFromGFFs.pm view on Meta::CPAN
use Moose;
use Bio::Roary::Exceptions;
use Bio::Roary::ExtractProteomeFromGFF;
use File::Basename;
use Cwd qw(getcwd);
use File::Temp;
with 'Bio::Roary::JobRunner::Role';
has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_fasta_files' );
has 'fasta_files_to_gff_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_fasta_files_to_gff_files' );
has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
has '_queue' => ( is => 'rw', isa => 'Str', default => 'small' );
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
sub _build__extract_proteome_objects
{
my ($self) = @_;
lib/Bio/Roary/FilterFullClusters.pm view on Meta::CPAN
has 'number_of_input_files' => ( is => 'ro', isa => 'Int', required => 1 );
has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_file' => ( is => 'ro', isa => 'Str', required => 1 );
has '_greater_than_or_equal' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'cdhit_input_fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'cdhit_output_fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_groups_file' => ( is => 'ro', isa => 'Str', required => 1 );
has '_full_cluster_gene_names' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__full_cluster_gene_names' );
has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
has '_output_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__output_seqio' );
has '_all_full_cluster_genes' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__all_full_cluster_genes' );
sub _build__full_cluster_gene_names
{
my($self) = @_;
my %full_cluster_gene_names ;
for my $gene_name (keys %{$self->_clustered_genes})
{
lib/Bio/Roary/FilterUnknownsFromFasta.pm view on Meta::CPAN
$Bio::Roary::FilterUnknownsFromFasta::VERSION = '3.13.0';
# ABSTRACT: Take in fasta files, remove sequences with too many unknowns and return a list of the new files
use Moose;
use Bio::SeqIO;
use Cwd;
use Bio::Roary::Exceptions;
use File::Basename;
has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
has 'maximum_percentage_of_unknowns' => ( is => 'ro', isa => 'Num', default => 5 );
has 'filtered_fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_filtered_fasta_files' );
has 'input_fasta_to_output_fasta' => ( is => 'ro', isa => 'HashRef', default => sub {{}} );
sub _build_filtered_fasta_files
{
my ($self) = @_;
my @output_file_names;
for my $fasta_file (@{$self->fasta_files})
{
my ( $filename, $directories, $suffix ) = fileparse($fasta_file);
push(@output_file_names, $self->_filter_fasta_sequences_and_return_new_file($filename,$fasta_file ));
lib/Bio/Roary/GeneNamesFromGFF.pm view on Meta::CPAN
package Bio::Roary::GeneNamesFromGFF;
$Bio::Roary::GeneNamesFromGFF::VERSION = '3.13.0';
# ABSTRACT: Parse a GFF and efficiently extract ID -> Gene Name
use Moose;
use Bio::Tools::GFF;
with 'Bio::Roary::ParseGFFAnnotationRole';
has 'ids_to_gene_name' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_ids_to_gene_name' );
has 'ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
# Parsing with the perl GFF module is exceptionally slow.
sub _build_ids_to_gene_name {
my ($self) = @_;
my %id_to_gene_name;
my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
while ( my $feature = $gffio->next_feature() ) {
lib/Bio/Roary/GroupStatistics.pm view on Meta::CPAN
use Bio::SeqIO;
use Bio::Roary::Exceptions;
use Bio::Roary::AnalyseGroups;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::PresenceAbsenceMatrix;
has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_rtab_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]');
has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
has '_sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sorted_file_names' );
has '_groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
has '_files_to_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__files_to_groups' );
has '_num_files_in_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__num_files_in_groups' );
has '_verbose' => ( is => 'ro', isa => 'Bool', default => 0 );
sub _build__output_fh {
my ($self) = @_;
open( my $fh, '>', $self->output_filename )
or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
error => "Couldnt write output file:" . $self->output_filename );
return $fh;
}
lib/Bio/Roary/JobRunner/Local.pm view on Meta::CPAN
package Bio::Roary::JobRunner::Local;
$Bio::Roary::JobRunner::Local::VERSION = '3.13.0';
# ABSTRACT: Execute a set of commands locally
use Moose;
use Log::Log4perl qw(:easy);
has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger');
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'memory_in_mb' => ( is => 'rw', isa => 'Int', default => '200' );
sub run {
my ($self) = @_;
for my $command_to_run ( @{ $self->commands_to_run } ) {
$self->logger->info($command_to_run);
system($command_to_run );
lib/Bio/Roary/JobRunner/Parallel.pm view on Meta::CPAN
$Bio::Roary::JobRunner::Parallel::VERSION = '3.13.0';
# ABSTRACT: Use GNU Parallel
use Moose;
use File::Temp qw/ tempfile /;
use Log::Log4perl qw(:easy);
use File::Slurper 'write_text';
use File::Temp qw/ tempfile /;
has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger');
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'memory_in_mb' => ( is => 'rw', isa => 'Int', default => '200' );
sub run {
my ($self) = @_;
my($fh, $temp_command_filename) = tempfile();
write_text($temp_command_filename, join("\n", @{ $self->commands_to_run }) );
lib/Bio/Roary/LookupGeneFiles.pm view on Meta::CPAN
package Bio::Roary::LookupGeneFiles;
$Bio::Roary::LookupGeneFiles::VERSION = '3.13.0';
# ABSTRACT: Take in an ordering of genes and a directory and return an ordered list of file locations
use Moose;
has 'multifasta_directory' => ( is => 'ro', isa => 'Str', default => 'pan_genome_sequences' );
has 'ordered_genes' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'ordered_gene_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_gene_files' );
sub _build_ordered_gene_files
{
my ($self) = @_;
my @gene_files;
for my $gene (@{$self->ordered_genes})
{
$gene =~ s!\W!_!gi;
my $filename = $gene.'.fa.aln';