Bio-Roary
view release on metacpan or search on metacpan
lib/Bio/Roary/ExtractCoreGenesFromSpreadsheet.pm view on Meta::CPAN
package Bio::Roary::ExtractCoreGenesFromSpreadsheet;
$Bio::Roary::ExtractCoreGenesFromSpreadsheet::VERSION = '3.13.0';
# ABSTRACT: Take in a spreadsheet produced by the pipeline and identify the core genes.
use Moose;
use Text::CSV;
use Bio::Roary::GroupStatistics;
use POSIX;
has 'spreadsheet' => ( is => 'ro', isa => 'Str', required => 1 );
has '_csv_parser' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__csv_parser' );
has '_input_spreadsheet_fh' => ( is => 'ro', lazy => 1, builder => '_build__input_spreadsheet_fh' );
has 'ordered_core_genes' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_core_genes' );
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
has 'sample_names' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
has 'sample_names_to_genes' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_number_of_isolates' => ( is => 'rw', isa => 'Int' );
has '_gene_column' => ( is => 'rw', isa => 'Int' );
has '_num_isolates_column' => ( is => 'rw', isa => 'Int' );
has '_avg_sequences_per_isolate_column' => ( is => 'rw', isa => 'Int' );
has '_genome_fragement_column' => ( is => 'rw', isa => 'Int' );
has '_order_within_fragement_column' => ( is => 'rw', isa => 'Int' );
has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Num', lazy => 1, builder => '_build__min_no_isolates_for_core' );
sub _build__min_no_isolates_for_core {
my ($self) = @_;
my $threshold = $self->_number_of_isolates * $self->core_definition;
return $threshold;
}
sub _build__csv_parser {
my ($self) = @_;
return Text::CSV->new( { binary => 1, always_quote => 1 } );
}
sub _build__input_spreadsheet_fh {
my ($self) = @_;
open( my $fh, $self->spreadsheet );
return $fh;
}
sub _update_number_of_isolates {
my ( $self, $header_row ) = @_;
my $number_of_isolates = @{$header_row} - @{ Bio::Roary::GroupStatistics->fixed_headers };
$self->_number_of_isolates($number_of_isolates);
}
sub _setup_column_mappings {
my ( $self, $header_row ) = @_;
#Â current ordering
my %columns_of_interest_mappings = (
'Gene' => 0,
'No. isolates' => 3,
'Avg sequences per isolate' => 5,
'Genome Fragment' => 6,
'Order within Fragment' => 7,
'QC' => 10,
);
# Dynamically overwrite the default ordering
for ( my $i = 0 ; $i < @{$header_row} ; $i++ ) {
for my $col_name (%columns_of_interest_mappings) {
if ( $header_row->[$i] eq $col_name ) {
$columns_of_interest_mappings{$col_name} = $i;
last;
}
}
}
$self->_gene_column( $columns_of_interest_mappings{'Gene'} );
( run in 0.682 second using v1.01-cache-2.11-cpan-98e64b0badf )