Bio-Roary

 view release on metacpan or  search on metacpan

lib/Bio/Roary/AccessoryBinaryFasta.pm  view on Meta::CPAN



use Moose;
use POSIX;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::AnalyseGroups;
use Bio::Roary::Exceptions;
use Bio::SeqIO;
use File::Basename;

has 'input_files'            => ( is => 'ro', isa => 'ArrayRef',                   required => 1 );
has 'annotate_groups_obj'    => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj'     => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups',  required => 1 );
has 'output_filename'        => ( is => 'ro', isa => 'Str',                        default  => 'accessory_binary_genes.fa' );
has 'lower_bound_percentage' => ( is => 'ro', isa => 'Int',                        default  => 5 );
has 'upper_bound_percentage' => ( is => 'ro', isa => 'Int',                        default  => 5 );
has 'max_accessory_to_include' => ( is => 'ro', isa => 'Int',                      default  => 4000 );
has 'groups_to_files'        => ( is => 'ro', isa => 'HashRef',                    lazy     => 1, builder => '_build__groups_to_files' );
has '_lower_bound_value'     => ( is => 'ro', isa => 'Int',                        lazy     => 1, builder => '_build__lower_bound_value' );
has '_upper_bound_value'     => ( is => 'ro', isa => 'Int',                        lazy     => 1, builder => '_build__upper_bound_value' );

sub _build__groups_to_files {
    my ($self) = @_;
    my %groups_to_files;
    for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
        my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
        my %filenames;
        for my $gene_name ( @{$genes} ) {

lib/Bio/Roary/AccessoryClustering.pm  view on Meta::CPAN



use Moose;
use Bio::Roary::External::Cdhit;
with 'Bio::Roary::ClustersRole';

has 'input_file'              => ( is => 'ro', isa => 'Str',     required => 1 );
has 'identity'                => ( is => 'ro', isa => 'Num',     default  => 0.9 );
has 'cpus'                    => ( is => 'ro', isa => 'Int',      default  => 1 );
has '_output_cd_hit_filename' => ( is => 'ro', isa => 'Str',     default  => '_accessory_clusters' );
has 'clusters_to_samples'     => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build_clusters_to_samples' );
has 'samples_to_clusters'     => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build_samples_to_clusters' );
has 'sample_weights'          => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build_sample_weights' );
has 'clusters_filename'       => ( is => 'ro', isa => 'Str',     lazy     => 1, builder => '_build_clusters_filename' );
has 'clusters'                => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build__clusters' );

sub _build_sample_weights {
    my ($self) = @_;
    my %sample_weights;
    for my $cluster_name ( keys %{ $self->clusters_to_samples } ) {
        my $cluster_size = @{ $self->clusters_to_samples->{$cluster_name} };
        for my $sample_name ( @{ $self->clusters_to_samples->{$cluster_name} } ) {
            $sample_weights{$sample_name} = 1 / $cluster_size;
        }
    }

lib/Bio/Roary/AnalyseGroups.pm  view on Meta::CPAN

package Bio::Roary::AnalyseGroups;
$Bio::Roary::AnalyseGroups::VERSION = '3.13.0';
# ABSTRACT: Take in a groups file and the original FASTA files and create plots and stats


use Moose;
use Bio::Roary::Exceptions;

has 'fasta_files'          => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'groups_filename'      => ( is => 'ro', isa => 'Str',      required => 1 );
has 'output_filename'      => ( is => 'ro', isa => 'Str',      default  => 'summary_of_groups' );

has '_number_of_isolates'  => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' );
has '_genes_to_file'       => ( is => 'rw', isa => 'HashRef' );
has '_files_to_genes'      => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__files_to_genes' );
has '_groups_to_genes'     => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_genes' );
has '_genes_to_groups'     => ( is => 'rw', isa => 'HashRef' );

has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_builder__groups' );


sub BUILD {
    my ($self) = @_;
    # This triggers _genes_to_groups to be built
    $self->_groups_to_genes;
    # This triggers _genes_to_file to be built
    $self->_files_to_genes;
}

lib/Bio/Roary/AnnotateGroups.pm  view on Meta::CPAN

# ABSTRACT: Take in a group file and associated GFF files for the isolates and update the group name to the gene name


use Moose;
use Bio::Roary::Exceptions;
use Bio::Roary::GeneNamesFromGFF;
use Array::Utils qw(array_minus);
use List::Util qw(max min sum);
use File::Grep qw(fgrep);

has 'gff_files'          => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename'    => ( is => 'ro', isa => 'Str',      default  => 'reannotated_groups_file' );
has 'groups_filename'    => ( is => 'ro', isa => 'Str',      required => 1 );
has '_ids_to_gene_names' => ( is => 'ro', isa => 'HashRef',  lazy     => 1, builder => '_build__ids_to_gene_names' );
has '_ids_to_product'    => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has '_ids_to_gene_size'  => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'group_nucleotide_lengths'  => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_group_nucleotide_lengths');

has '_groups_to_id_names'   => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_id_names' );
has '_output_fh'            => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
has '_groups_to_consensus_gene_names' =>
  ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_consensus_gene_names' );
has '_filtered_gff_files'   => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__filtered_gff_files' );
has '_number_of_files'      => ( is => 'ro', isa => 'Int',      lazy => 1, builder => '_build__number_of_files' );
has '_ids_to_groups'        => ( is => 'rw', isa => 'HashRef',  lazy => 1, builder => '_builder__ids_to_groups' );
has '_group_counter'        => ( is => 'rw', isa => 'Int', lazy => 1, builder => '_builder__group_counter' );
has '_group_default_prefix' => ( is => 'rw', isa => 'Str', default => 'group_' );
has '_ids_to_verbose_stats' => ( is => 'rw', isa => 'HashRef', lazy_build => 1 );

sub BUILD {
    my ($self) = @_;
    $self->_ids_to_gene_names;
}

lib/Bio/Roary/AssemblyStatistics.pm  view on Meta::CPAN


has 'output_filename'       => ( is => 'ro', isa => 'Str',      default => 'assembly_statistics.csv' );
has 'job_runner'            => ( is => 'ro', isa => 'Str',      default => 'Local' );
has 'cpus'                  => ( is => 'ro', isa => 'Int',      default => 1 );
has 'core_definition'       => ( is => 'rw', isa => 'Num',      default => 0.99 );
has '_cloud_percentage'     => ( is => 'rw', isa => 'Num',      default => 0.15 );
has '_shell_percentage'     => ( is => 'rw', isa => 'Num',      default => 0.95 );
has '_soft_core_percentage' => ( is => 'rw', isa => 'Num',      default => 0.99 );
has 'verbose'               => ( is => 'ro', isa => 'Bool',     default => 0 );
has 'contiguous_window'     => ( is => 'ro', isa => 'Int',      default => 10 );
has 'ordered_genes'         => ( is => 'ro', isa => 'ArrayRef', lazy    => 1, builder => '_build_ordered_genes' );
has '_genes_to_rows'        => ( is => 'ro', isa => 'HashRef',  lazy    => 1, builder => '_build__genes_to_rows' );
has 'all_sample_statistics' => ( is => 'ro', isa => 'HashRef',  lazy    => 1, builder => '_build_all_sample_statistics' );
has 'sample_names_to_column_index' => ( is => 'rw', isa => 'Maybe[HashRef]' );
has 'summary_output_filename'=> ( is => 'ro', isa => 'Str',      default => 'summary_statistics.txt' );
has 'logger'                 => ( is => 'ro', lazy => 1, builder => '_build_logger');
has 'gene_category_count'   => ( is => 'ro', isa => 'HashRef',  lazy    => 1, builder => '_build_gene_category_count' );

sub BUILD {
    my ($self) = @_;
    $self->_genes_to_rows;
	$self->gene_category_count;
}

sub _build_logger
{
    my ($self) = @_;

lib/Bio/Roary/ChunkFastaFile.pm  view on Meta::CPAN



use Moose;
use Bio::SeqIO;
use Bio::Roary::Exceptions;
use Cwd;
use File::Temp;

has 'fasta_file'          => ( is => 'ro', isa => 'Str',      required => 1 );
has 'target_chunk_size'   => ( is => 'ro', isa => 'Int',      default  => 200000 );
has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy     => 1, builder => '_build_sequence_file_names' );
has '_working_directory' =>
  ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );

sub _build__working_directory_name {
    my ($self) = @_;
    return $self->_working_directory->dirname();
}

lib/Bio/Roary/CombinedProteome.pm  view on Meta::CPAN

package Bio::Roary::CombinedProteome;
$Bio::Roary::CombinedProteome::VERSION = '3.13.0';
# ABSTRACT: Take in multiple FASTA sequences containing proteomes and concat them together and output a FASTA file, filtering out more than 5% X's


use Moose;
use Bio::Roary::Exceptions;

has 'proteome_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename'                => ( is => 'ro', isa => 'Str',      default  => 'combined_output.fa' );

sub BUILD {
    my ($self) = @_;

    for my $filename ( @{ $self->proteome_files } ) {
        Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $filename )
          unless ( -e $filename );
    }
}

lib/Bio/Roary/CommandLine/AssemblyStatistics.pm  view on Meta::CPAN

package Bio::Roary::CommandLine::AssemblyStatistics;
$Bio::Roary::CommandLine::AssemblyStatistics::VERSION = '3.13.0';
# ABSTRACT: Given a spreadsheet of gene presence and absence calculate some statistics


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::AssemblyStatistics;
extends 'Bio::Roary::CommandLine::Common';

has 'args'            => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name'     => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'            => ( is => 'rw', isa => 'Bool',     default  => 0 );
has 'spreadsheet'     => ( is => 'rw', isa => 'Str',      default  => 'gene_presence_absence.csv' );
has 'job_runner'      => ( is => 'rw', isa => 'Str',      default  => 'Local' );
has 'cpus'            => ( is => 'rw', isa => 'Int',      default  => 1 );
has 'output_filename' => ( is => 'rw', isa => 'Str',      default  => 'assembly_statistics.csv' );
has 'version'         => ( is => 'rw', isa => 'Bool',     default  => 0 );
has 'core_definition' => ( is => 'rw', isa => 'Num',      default  => 0.99 );
has 'verbose'         => ( is => 'rw', isa => 'Bool',     default  => 0 );

lib/Bio/Roary/CommandLine/ExtractProteomeFromGff.pm  view on Meta::CPAN

$Bio::Roary::CommandLine::ExtractProteomeFromGff::VERSION = '3.13.0';
# ABSTRACT: Take in GFF files and output the proteome


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::ExtractProteomeFromGFF;
use File::Basename;
extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'gff_files'             => ( is => 'rw', isa => 'ArrayRef' );
has 'output_suffix'         => ( is => 'rw', isa => 'Str',  default => 'proteome.faa' );
has '_error_message'        => ( is => 'rw', isa => 'Str' );
has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
has 'translation_table'     => ( is => 'rw', isa => 'Int',  default => 11 );
has 'verbose'               => ( is => 'rw', isa => 'Bool', default => 0 );
has 'output_directory'      => ( is => 'rw', isa => 'Str',  default => '.' );

lib/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.pm  view on Meta::CPAN

use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use File::Copy;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::External::Prank;
use Bio::Roary::Output::GroupsMultifastaProtein;
use Bio::Roary::SortFasta;
use Bio::Roary::External::Mafft;
extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'nucleotide_fasta_files' => ( is => 'rw', isa => 'ArrayRef' );
has '_error_message'         => ( is => 'rw', isa => 'Str' );
has 'verbose'                => ( is => 'rw', isa => 'Bool', default => 0 );
has 'mafft'                  => ( is => 'rw', isa => 'Bool', default => 0 );
has '_min_similarity'        => ( is => 'rw', isa => 'Num',  default  => 0.98 );

sub BUILD {

lib/Bio/Roary/CommandLine/IterativeCdhit.pm  view on Meta::CPAN

package Bio::Roary::CommandLine::IterativeCdhit;
$Bio::Roary::CommandLine::IterativeCdhit::VERSION = '3.13.0';
# ABSTRACT: Iteratively run cdhit


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::IterativeCdhit;
extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
has '_error_message' => ( is => 'rw', isa => 'Str' );

has 'output_cd_hit_filename'          => ( is => 'rw', isa => 'Str', default => '_clustered' );
has 'output_combined_filename'        => ( is => 'rw', isa => 'Str', default => '_combined_files' );
has 'number_of_input_files'           => ( is => 'rw', isa => 'Int', default => 1 );
has 'output_filtered_clustered_fasta' => ( is => 'rw', isa => 'Str', default => '_clustered_filtered.fa' );

has 'lower_bound_percentage'          => ( is => 'rw', isa => 'Num', default => 0.98 );

lib/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.pm  view on Meta::CPAN

# ABSTRACT: Take in a FASTA file of proteins and blast against itself


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::ParallelAllAgainstAllBlast;
use Bio::Roary::CombinedProteome;
use Bio::Roary::PrepareInputFiles;
extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'fasta_files'       => ( is => 'rw', isa => 'ArrayRef' );
has 'output_filename'   => ( is => 'rw', isa => 'Str', default => 'blast_results' );
has 'job_runner'        => ( is => 'rw', isa => 'Str', default => 'Local' );
has 'cpus'                        => ( is => 'rw', isa => 'Int',  default => 1 );
has 'makeblastdb_exec'  => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec'       => ( is => 'rw', isa => 'Str', default => 'blastp' );
has 'verbose'           => ( is => 'rw', isa => 'Bool', default => 0 );

lib/Bio/Roary/CommandLine/Roary.pm  view on Meta::CPAN

use Bio::Roary::QC::Report;
use Bio::Roary::ReformatInputGFFs;
use Bio::Roary::External::CheckTools;
use File::Which;
use File::Path qw(make_path);
use Cwd qw(abs_path getcwd);
use File::Temp;
use File::Basename;
extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
has 'output_filename'         => ( is => 'rw', isa => 'Str',  default => 'clustered_proteins' );
has 'output_directory'        => ( is => 'rw', isa => 'Str',  default => '.' );
has '_original_directory'     => ( is => 'rw', isa => 'Str',  default => '.' );
has 'job_runner'              => ( is => 'rw', isa => 'Str',  default => 'Local' );
has 'makeblastdb_exec'        => ( is => 'rw', isa => 'Str',  default => 'makeblastdb' );
has 'blastp_exec'             => ( is => 'rw', isa => 'Str',  default => 'blastp' );

lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm  view on Meta::CPAN


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Cwd 'abs_path';
use File::Path qw(remove_tree);
use Bio::Roary::ExtractCoreGenesFromSpreadsheet;
use Bio::Roary::LookupGeneFiles;
use Bio::Roary::MergeMultifastaAlignments;
extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'multifasta_base_directory' => ( is => 'rw', isa => 'Str', default => 'pan_genome_sequences' );
has 'spreadsheet_filename'      => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_filename'           => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' );
has 'core_definition'           => ( is => 'rw', isa => 'Num', default => 0.99 );
has 'dont_delete_files'         => ( is => 'rw', isa => 'Bool', default => 0 );
has 'allow_paralogs'            => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message'            => ( is => 'rw', isa => 'Str' );

lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm  view on Meta::CPAN


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::PostAnalysis;
use File::Find::Rule;
use Bio::Roary::External::GeneAlignmentFromNucleotides;
use File::Path qw(remove_tree);
use Bio::Roary::External::Fasttree;
extends 'Bio::Roary::CommandLine::Common';

has 'args'                        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name'                 => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'                        => ( is => 'rw', isa => 'Bool',     default  => 0 );
has '_error_message'              => ( is => 'rw', isa => 'Str' );

has 'fasta_files'                 => ( is => 'rw', isa => 'Str',  default  => '_fasta_files' );
has 'input_files'                 => ( is => 'rw', isa => 'Str',  default  => '_gff_files');
has 'output_filename'             => ( is => 'rw', isa => 'Str',  default  => 'clustered_proteins' );
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str',  default  => 'pan_genome.fa' );
has 'output_statistics_filename'  => ( is => 'rw', isa => 'Str',  default  => 'gene_presence_absence.csv' );
has 'output_multifasta_files'     => ( is => 'rw', isa => 'Bool', default  => 0 );

lib/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.pm  view on Meta::CPAN

package Bio::Roary::CommandLine::RoaryReorderSpreadsheet;
$Bio::Roary::CommandLine::RoaryReorderSpreadsheet::VERSION = '3.13.0';
# ABSTRACT: Take in a tree and a spreadsheet and output a reordered spreadsheet


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::ReorderSpreadsheet;
extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'tree_file'            => ( is => 'rw', isa => 'Str' );
has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_filename'      => ( is => 'rw', isa => 'Str', default => 'reordered_spreadsheet.csv' );
has 'tree_format'          => ( is => 'rw', isa => 'Str', default => 'newick' );
has 'search_strategy'      => ( is => 'rw', isa => 'Str', default => 'depth' );
has 'sortby'               => ( is => 'rw', isa => 'Str', default => 'height');
has 'verbose'              => ( is => 'rw', isa => 'Bool', default => 0 );

lib/Bio/Roary/CommandLine/TransferAnnotationToGroups.pm  view on Meta::CPAN

$Bio::Roary::CommandLine::TransferAnnotationToGroups::VERSION = '3.13.0';
# ABSTRACT: Take in a groups file and a set of GFF files and transfer the consensus annotation


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::AnnotateGroups;
extends 'Bio::Roary::CommandLine::Common';


has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'gff_files'       => ( is => 'rw', isa => 'ArrayRef' );
has 'groups_filename' => ( is => 'rw', isa => 'Str' );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'reannotated_groups' );
has 'verbose'         => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message'  => ( is => 'rw', isa => 'Str' );

sub BUILD {

lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm  view on Meta::CPAN

$Bio::Roary::CommandLine::UniqueGenesPerSample::VERSION = '3.13.0';
# ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes


use Moose;
use Getopt::Long qw(GetOptionsFromArray);
use Bio::Roary::UniqueGenesPerSample;

extends 'Bio::Roary::CommandLine::Common';

has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );

has 'clustered_proteins' => ( is => 'rw', isa => 'Str',  default => 'clustered_proteins' );
has 'output_filename'    => ( is => 'rw', isa => 'Str',  default => 'unique_genes_per_sample.tsv' );
has 'verbose'            => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message'     => ( is => 'rw', isa => 'Str' );

sub BUILD {
    my ($self) = @_;

lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm  view on Meta::CPAN

$Bio::Roary::ContigsToGeneIDsFromGFF::VERSION = '3.13.0';
# ABSTRACT: Parse a GFF and efficiently and extract ordered gene ids on each contig


use Moose;
use Bio::Tools::GFF;
with 'Bio::Roary::ParseGFFAnnotationRole';

has 'contig_to_ids' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build_contig_to_ids');

has 'overlapping_hypothetical_protein_ids' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_overlapping_hypothetical_protein_ids');
has '_genes_annotation' => ( is => 'rw', isa => 'ArrayRef', default => sub{[]});

has '_min_nucleotide_overlap_percentage' => ( is => 'ro', isa => 'Int', default => 10);

# Manually parse the GFF file because the BioPerl module is too slow
sub _build_contig_to_ids
{
  my ($self) = @_;
  my %contigs_to_ids;
  my @genes_annotation;

lib/Bio/Roary/External/GeneAlignmentFromNucleotides.pm  view on Meta::CPAN

package Bio::Roary::External::GeneAlignmentFromNucleotides;
$Bio::Roary::External::GeneAlignmentFromNucleotides::VERSION = '3.13.0';
# ABSTRACT: Take in multi-FASTA files of nucleotides and align each file with PRANK or MAFFT


use Moose;
with 'Bio::Roary::JobRunner::Role';

has 'fasta_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'exec'                        => ( is => 'ro', isa => 'Str',      default  => 'protein_alignment_from_nucleotides' );
has 'translation_table'           => ( is => 'rw', isa => 'Int',      default => 11 );
has 'core_definition'             => ( is => 'ro', isa => 'Num',      default => 1 );
has 'mafft'                       => ( is => 'ro', isa => 'Bool',     default => 0 );
has 'dont_delete_files'           => ( is => 'rw', isa => 'Bool',     default  => 0 );
has 'allow_paralogs'              => ( is => 'rw', isa => 'Bool',     default  => 0 );
has 'num_input_files'             => ( is => 'ro', isa => 'Int',      required => 1);

# Overload Role`
has 'memory_in_mb' => ( is => 'rw', isa => 'Int', lazy     => 1, builder => '_build_memory_in_mb' );

lib/Bio/Roary/External/PostAnalysis.pm  view on Meta::CPAN

package Bio::Roary::External::PostAnalysis;
$Bio::Roary::External::PostAnalysis::VERSION = '3.13.0';
# ABSTRACT: Perform the post analysis


use Moose;
use Cwd  qw(getcwd); 
with 'Bio::Roary::JobRunner::Role';

has 'input_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'exec'                        => ( is => 'ro', isa => 'Str', default  => 'pan_genome_post_analysis' );
has 'fasta_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename'             => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_pan_geneome_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_statistics_filename'  => ( is => 'ro', isa => 'Str', required => 1 );
has 'clusters_filename'           => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_multifasta_files'     => ( is => 'ro', isa => 'Bool', required => 1 );
has 'dont_delete_files'           => ( is => 'ro', isa => 'Bool', default  => 0 );
has 'dont_create_rplots'          => ( is => 'rw', isa => 'Bool', default  => 0 );
has 'dont_split_groups'           => ( is => 'rw', isa => 'Bool', default  => 0 );
has 'verbose_stats'               => ( is => 'rw', isa => 'Bool', default  => 0 );
has 'translation_table'           => ( is => 'rw', isa => 'Int',  default  => 11 );

lib/Bio/Roary/ExtractCoreGenesFromSpreadsheet.pm  view on Meta::CPAN



use Moose;
use Text::CSV;
use Bio::Roary::GroupStatistics;
use POSIX;

has 'spreadsheet'           => ( is => 'ro', isa  => 'Str',       required => 1 );
has '_csv_parser'           => ( is => 'ro', isa  => 'Text::CSV', lazy     => 1, builder => '_build__csv_parser' );
has '_input_spreadsheet_fh' => ( is => 'ro', lazy => 1,           builder  => '_build__input_spreadsheet_fh' );
has 'ordered_core_genes'    => ( is => 'ro', isa  => 'ArrayRef',  lazy     => 1, builder => '_build_ordered_core_genes' );
has 'core_definition'       => ( is => 'ro', isa  => 'Num',       default  => 1 );
has 'sample_names'          => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
has 'sample_names_to_genes' => ( is => 'rw', isa => 'HashRef',  default => sub { {} } );
has 'allow_paralogs'        => ( is => 'rw', isa => 'Bool',     default => 0 );

has '_number_of_isolates'               => ( is => 'rw', isa => 'Int' );
has '_gene_column'                      => ( is => 'rw', isa => 'Int' );
has '_num_isolates_column'              => ( is => 'rw', isa => 'Int' );
has '_avg_sequences_per_isolate_column' => ( is => 'rw', isa => 'Int' );
has '_genome_fragement_column'          => ( is => 'rw', isa => 'Int' );

lib/Bio/Roary/ExtractProteomeFromGFFs.pm  view on Meta::CPAN



use Moose;
use Bio::Roary::Exceptions;
use Bio::Roary::ExtractProteomeFromGFF;
use File::Basename;
use Cwd qw(getcwd); 
use File::Temp;
with 'Bio::Roary::JobRunner::Role';

has 'gff_files'                => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'fasta_files'              => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_fasta_files' );
has 'fasta_files_to_gff_files' => ( is => 'ro', isa => 'HashRef',  lazy => 1, builder => '_build_fasta_files_to_gff_files' );
has 'apply_unknowns_filter'    => ( is => 'rw', isa => 'Bool', default => 1 );
has '_queue'                   => ( is => 'rw', isa => 'Str',  default => 'small' );
has 'translation_table'        => ( is => 'rw', isa => 'Int',  default => 11 );
has 'verbose'                  => ( is => 'rw', isa => 'Bool', default => 0 );
has 'working_directory'        => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );

sub _build__extract_proteome_objects
{
  my ($self) = @_;

lib/Bio/Roary/FilterFullClusters.pm  view on Meta::CPAN


has 'number_of_input_files' => ( is => 'ro', isa => 'Int', required => 1 );
has 'fasta_file'     => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_file'    => ( is => 'ro', isa => 'Str', required => 1 );
has '_greater_than_or_equal' =>  ( is => 'ro', isa => 'Bool', default => 0 );
has 'cdhit_input_fasta_file'    => ( is => 'ro', isa => 'Str', required => 1 );
has 'cdhit_output_fasta_file'    => ( is => 'ro', isa => 'Str', required => 1 );

has 'output_groups_file' => ( is => 'ro', isa => 'Str', required => 1 );

has '_full_cluster_gene_names'    => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__full_cluster_gene_names' );
has '_input_seqio'  => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
has '_output_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__output_seqio' );

has '_all_full_cluster_genes'    => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__all_full_cluster_genes' );

sub _build__full_cluster_gene_names
{
  my($self) = @_;
  
  my %full_cluster_gene_names ;
  
  for my $gene_name (keys %{$self->_clustered_genes})
  {
  

lib/Bio/Roary/FilterUnknownsFromFasta.pm  view on Meta::CPAN

$Bio::Roary::FilterUnknownsFromFasta::VERSION = '3.13.0';
# ABSTRACT: Take in fasta files, remove sequences with too many unknowns and return a list of the new files


use Moose;
use Bio::SeqIO;
use Cwd;
use Bio::Roary::Exceptions;
use File::Basename;

has 'fasta_files'                    => ( is => 'ro', isa => 'ArrayRef',  required => 1 );
has 'apply_unknowns_filter'          => ( is => 'rw', isa => 'Bool', default => 1 );
has 'maximum_percentage_of_unknowns' => ( is => 'ro', isa => 'Num',  default  => 5 );

has 'filtered_fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_filtered_fasta_files' );

has 'input_fasta_to_output_fasta' => ( is => 'ro', isa => 'HashRef', default => sub {{}} );

sub _build_filtered_fasta_files
{
  my ($self) = @_;
  
  my @output_file_names;
  for my $fasta_file (@{$self->fasta_files})
  {
    my ( $filename, $directories, $suffix ) = fileparse($fasta_file);
    push(@output_file_names, $self->_filter_fasta_sequences_and_return_new_file($filename,$fasta_file ));

lib/Bio/Roary/GeneNamesFromGFF.pm  view on Meta::CPAN

package Bio::Roary::GeneNamesFromGFF;
$Bio::Roary::GeneNamesFromGFF::VERSION = '3.13.0';
# ABSTRACT: Parse a GFF and efficiently extract ID -> Gene Name


use Moose;

use Bio::Tools::GFF;
with 'Bio::Roary::ParseGFFAnnotationRole';

has 'ids_to_gene_name' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_ids_to_gene_name' );
has 'ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );

# Parsing with the perl GFF module is exceptionally slow.
sub _build_ids_to_gene_name {
    my ($self) = @_;
    my %id_to_gene_name;

    my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
    while ( my $feature = $gffio->next_feature() ) {

lib/Bio/Roary/GroupStatistics.pm  view on Meta::CPAN

use Bio::SeqIO;
use Bio::Roary::Exceptions;
use Bio::Roary::AnalyseGroups;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::PresenceAbsenceMatrix;

has 'annotate_groups_obj'  => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj'   => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups',  required => 1 );
has 'output_filename'      => ( is => 'ro', isa => 'Str',                        default  => 'gene_presence_absence.csv' );
has 'output_rtab_filename' => ( is => 'ro', isa => 'Str',                        default  => 'gene_presence_absence.Rtab' );
has 'groups_to_contigs'    => ( is => 'ro', isa => 'Maybe[HashRef]');
has '_output_fh'           => ( is => 'ro', lazy => 1,           builder => '_build__output_fh' );
has '_text_csv_obj'        => ( is => 'ro', isa  => 'Text::CSV', lazy    => 1, builder => '_build__text_csv_obj' );
has '_sorted_file_names'   => ( is => 'ro', isa  => 'ArrayRef',  lazy    => 1, builder => '_build__sorted_file_names' );
has '_groups_to_files'     => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__groups_to_files' );
has '_files_to_groups'     => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__files_to_groups' );
has '_num_files_in_groups' => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__num_files_in_groups' );
has '_verbose'             => ( is => 'ro', isa => 'Bool', default => 0 );


sub _build__output_fh {
    my ($self) = @_;
    open( my $fh, '>', $self->output_filename )
      or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
        error => "Couldnt write output file:" . $self->output_filename );
    return $fh;
}

lib/Bio/Roary/JobRunner/Local.pm  view on Meta::CPAN

package Bio::Roary::JobRunner::Local;
$Bio::Roary::JobRunner::Local::VERSION = '3.13.0';
# ABSTRACT: Execute a set of commands locally


use Moose;
use Log::Log4perl qw(:easy);

has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'logger'          => ( is => 'ro', lazy => 1, builder => '_build_logger');
has 'verbose'         => ( is => 'rw', isa => 'Bool', default => 0 );
has 'memory_in_mb'    => ( is => 'rw', isa => 'Int',  default => '200' );

sub run {
    my ($self) = @_;

    for my $command_to_run ( @{ $self->commands_to_run } ) {  
        $self->logger->info($command_to_run);
        system($command_to_run );

lib/Bio/Roary/JobRunner/Parallel.pm  view on Meta::CPAN

$Bio::Roary::JobRunner::Parallel::VERSION = '3.13.0';
# ABSTRACT: Use GNU Parallel


use Moose;
use File::Temp qw/ tempfile /;
use Log::Log4perl qw(:easy);
use File::Slurper 'write_text';
use File::Temp qw/ tempfile /;

has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'cpus'            => ( is => 'ro', isa => 'Int',      default => 1 );
has 'logger'          => ( is => 'ro', lazy => 1, builder => '_build_logger');
has 'verbose'         => ( is => 'rw', isa => 'Bool', default => 0 );
has 'memory_in_mb'    => ( is => 'rw', isa => 'Int',  default => '200' );

sub run {
    my ($self) = @_;
	
	  my($fh, $temp_command_filename) = tempfile();
	  write_text($temp_command_filename, join("\n", @{ $self->commands_to_run }) );

lib/Bio/Roary/LookupGeneFiles.pm  view on Meta::CPAN

package Bio::Roary::LookupGeneFiles;
$Bio::Roary::LookupGeneFiles::VERSION = '3.13.0';
# ABSTRACT: Take in an ordering of genes and a directory and return an ordered list of file locations


use Moose;

has 'multifasta_directory' => ( is => 'ro', isa => 'Str', default => 'pan_genome_sequences' );
has 'ordered_genes'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );

has 'ordered_gene_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_gene_files' );


sub _build_ordered_gene_files
{
  my ($self) = @_;
  my @gene_files;
  for my $gene (@{$self->ordered_genes})
  {
    $gene =~ s!\W!_!gi;
    my $filename = $gene.'.fa.aln';



( run in 0.984 second using v1.01-cache-2.11-cpan-5f2e87ce722 )