Bio-Roary
view release on metacpan or search on metacpan
lib/Bio/Roary/AnnotateGroups.pm view on Meta::CPAN
package Bio::Roary::AnnotateGroups;
$Bio::Roary::AnnotateGroups::VERSION = '3.13.0';
# ABSTRACT: Take in a group file and associated GFF files for the isolates and update the group name to the gene name
use Moose;
use Bio::Roary::Exceptions;
use Bio::Roary::GeneNamesFromGFF;
use Array::Utils qw(array_minus);
use List::Util qw(max min sum);
use File::Grep qw(fgrep);
has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'reannotated_groups_file' );
has 'groups_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has '_ids_to_gene_names' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__ids_to_gene_names' );
has '_ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has '_ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'group_nucleotide_lengths' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_group_nucleotide_lengths');
has '_groups_to_id_names' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_id_names' );
has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
has '_groups_to_consensus_gene_names' =>
( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_consensus_gene_names' );
has '_filtered_gff_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__filtered_gff_files' );
has '_number_of_files' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__number_of_files' );
has '_ids_to_groups' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_builder__ids_to_groups' );
has '_group_counter' => ( is => 'rw', isa => 'Int', lazy => 1, builder => '_builder__group_counter' );
has '_group_default_prefix' => ( is => 'rw', isa => 'Str', default => 'group_' );
has '_ids_to_verbose_stats' => ( is => 'rw', isa => 'HashRef', lazy_build => 1 );
sub BUILD {
my ($self) = @_;
$self->_ids_to_gene_names;
}
sub _builder__group_counter {
my ($self) = @_;
my $prefix = $self->_group_default_prefix;
my $highest_group = 0;
for my $group ( @{ $self->_groups } ) {
if ( $group =~ /$prefix([\d]+)$/ ) {
my $group_id = $1;
if ( $group_id > $highest_group ) {
$highest_group = $group_id;
}
}
}
return $highest_group + 1;
}
sub _generate__ids_to_groups {
my ($self) = @_;
my %ids_to_groups;
for my $group ( keys %{ $self->_groups_to_id_names } ) {
for my $id_name ( @{ $self->_groups_to_id_names->{$group} } ) {
$ids_to_groups{$id_name} = $group;
}
}
return \%ids_to_groups;
}
sub _builder__ids_to_groups {
my ($self) = @_;
return $self->_generate__ids_to_groups;
}
sub _build__output_fh {
my ($self) = @_;
open( my $fh, '>', $self->output_filename )
or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
error => "Couldnt write output file:" . $self->output_filename );
return $fh;
}
sub _build__filtered_gff_files {
my ($self) = @_;
my @gff_files = grep( /\.gff$/, @{ $self->gff_files } );
return \@gff_files;
}
sub _build__ids_to_gene_names {
my ($self) = @_;
my %ids_to_gene_names;
( run in 0.588 second using v1.01-cache-2.11-cpan-ceb78f64989 )