Bio-Roary
view release on metacpan or search on metacpan
lib/Bio/Roary/GroupStatistics.pm view on Meta::CPAN
package Bio::Roary::GroupStatistics;
$Bio::Roary::GroupStatistics::VERSION = '3.13.0';
# ABSTRACT: Add labels to the groups
use Moose;
use POSIX;
use Text::CSV;
use File::Basename;
use Bio::SeqIO;
use Bio::Roary::Exceptions;
use Bio::Roary::AnalyseGroups;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::PresenceAbsenceMatrix;
has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_rtab_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]');
has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
has '_sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sorted_file_names' );
has '_groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
has '_files_to_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__files_to_groups' );
has '_num_files_in_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__num_files_in_groups' );
has '_verbose' => ( is => 'ro', isa => 'Bool', default => 0 );
sub _build__output_fh {
my ($self) = @_;
open( my $fh, '>', $self->output_filename )
or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
error => "Couldnt write output file:" . $self->output_filename );
return $fh;
}
sub _build__text_csv_obj {
my ($self) = @_;
return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n" } );
}
sub fixed_headers {
my ($self) = @_;
my @header =
( 'Gene', 'Non-unique Gene name', 'Annotation', 'No. isolates', 'No. sequences', 'Avg sequences per isolate', 'Genome Fragment','Order within Fragment', 'Accessory Fragment','Accessory Order with Fragment', 'QC','Min group size nuc', 'Max group...
return \@header;
}
sub _sample_headers
{
my ($self) = @_;
my @header;
for my $filename ( @{ $self->_sorted_file_names } ) {
my $filename_cpy = basename($filename);
$filename_cpy =~ s!\.gff\.proteome\.faa!!;
push( @header, $filename_cpy );
}
return \@header;
}
sub _header {
my ($self) = @_;
my @header = @{ $self->fixed_headers };
push( @header, @{$self->_sample_headers});
push( @header, 'Inference' ) if ( $self->_verbose );
return \@header;
}
sub _build__sorted_file_names {
my ($self) = @_;
my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
return \@sorted_file_names;
}
sub _non_unique_name_for_group {
my ( $self, $annotated_group_name ) = @_;
my $duplicate_gene_name = '';
my $prefix = $self->annotate_groups_obj->_group_default_prefix;
if ( $annotated_group_name =~ /$prefix/ ) {
my $non_unique_name_for_group =
$self->annotate_groups_obj->_consensus_gene_name_for_group($annotated_group_name);
if ( !( $non_unique_name_for_group =~ /$prefix/ ) ) {
$duplicate_gene_name = $non_unique_name_for_group;
}
}
( run in 0.786 second using v1.01-cache-2.11-cpan-5a3173703d6 )