Bio-Roary

 view release on metacpan or  search on metacpan

lib/Bio/Roary/GroupStatistics.pm  view on Meta::CPAN

package Bio::Roary::GroupStatistics;
$Bio::Roary::GroupStatistics::VERSION = '3.13.0';
# ABSTRACT: Add labels to the groups


use Moose;
use POSIX;
use Text::CSV;
use File::Basename;
use Bio::SeqIO;
use Bio::Roary::Exceptions;
use Bio::Roary::AnalyseGroups;
use Bio::Roary::AnnotateGroups;
use Bio::Roary::PresenceAbsenceMatrix;

has 'annotate_groups_obj'  => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj'   => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups',  required => 1 );
has 'output_filename'      => ( is => 'ro', isa => 'Str',                        default  => 'gene_presence_absence.csv' );
has 'output_rtab_filename' => ( is => 'ro', isa => 'Str',                        default  => 'gene_presence_absence.Rtab' );
has 'groups_to_contigs'    => ( is => 'ro', isa => 'Maybe[HashRef]');
has '_output_fh'           => ( is => 'ro', lazy => 1,           builder => '_build__output_fh' );
has '_text_csv_obj'        => ( is => 'ro', isa  => 'Text::CSV', lazy    => 1, builder => '_build__text_csv_obj' );
has '_sorted_file_names'   => ( is => 'ro', isa  => 'ArrayRef',  lazy    => 1, builder => '_build__sorted_file_names' );
has '_groups_to_files'     => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__groups_to_files' );
has '_files_to_groups'     => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__files_to_groups' );
has '_num_files_in_groups' => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__num_files_in_groups' );
has '_verbose'             => ( is => 'ro', isa => 'Bool', default => 0 );


sub _build__output_fh {
    my ($self) = @_;
    open( my $fh, '>', $self->output_filename )
      or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
        error => "Couldnt write output file:" . $self->output_filename );
    return $fh;
}

sub _build__text_csv_obj {
    my ($self) = @_;
    return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n" } );
}

sub fixed_headers {
    my ($self) = @_;
    my @header =
      ( 'Gene', 'Non-unique Gene name', 'Annotation', 'No. isolates', 'No. sequences', 'Avg sequences per isolate', 'Genome Fragment','Order within Fragment', 'Accessory Fragment','Accessory Order with Fragment', 'QC','Min group size nuc', 'Max group...
    return \@header;
}

sub _sample_headers
{
	my ($self) = @_;
	my @header;
    for my $filename ( @{ $self->_sorted_file_names } ) {
        my $filename_cpy = basename($filename);
        $filename_cpy =~ s!\.gff\.proteome\.faa!!;
        push( @header, $filename_cpy );
    }
	return \@header;
}

sub _header {
    my ($self) = @_;
    my @header = @{ $self->fixed_headers };
    push( @header, @{$self->_sample_headers});
    push( @header, 'Inference' ) if ( $self->_verbose );
    return \@header;
}

sub _build__sorted_file_names {
    my ($self) = @_;
    my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
    return \@sorted_file_names;
}

sub _non_unique_name_for_group {
    my ( $self, $annotated_group_name ) = @_;
    my $duplicate_gene_name = '';
    my $prefix              = $self->annotate_groups_obj->_group_default_prefix;
    if ( $annotated_group_name =~ /$prefix/ ) {
        my $non_unique_name_for_group =
          $self->annotate_groups_obj->_consensus_gene_name_for_group($annotated_group_name);
        if ( !( $non_unique_name_for_group =~ /$prefix/ ) ) {
            $duplicate_gene_name = $non_unique_name_for_group;
        }
    }



( run in 0.786 second using v1.01-cache-2.11-cpan-5a3173703d6 )