Bio-Roary

 view release on metacpan or  search on metacpan

lib/Bio/Roary/AnnotateGroups.pm  view on Meta::CPAN

package Bio::Roary::AnnotateGroups;
$Bio::Roary::AnnotateGroups::VERSION = '3.13.0';
# ABSTRACT: Take in a group file and associated GFF files for the isolates and update the group name to the gene name


use Moose;
use Bio::Roary::Exceptions;
use Bio::Roary::GeneNamesFromGFF;
use Array::Utils qw(array_minus);
use List::Util qw(max min sum);
use File::Grep qw(fgrep);

has 'gff_files'          => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_filename'    => ( is => 'ro', isa => 'Str',      default  => 'reannotated_groups_file' );
has 'groups_filename'    => ( is => 'ro', isa => 'Str',      required => 1 );
has '_ids_to_gene_names' => ( is => 'ro', isa => 'HashRef',  lazy     => 1, builder => '_build__ids_to_gene_names' );
has '_ids_to_product'    => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has '_ids_to_gene_size'  => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
has 'group_nucleotide_lengths'  => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_group_nucleotide_lengths');

has '_groups_to_id_names'   => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_id_names' );
has '_output_fh'            => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
has '_groups_to_consensus_gene_names' =>
  ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_consensus_gene_names' );
has '_filtered_gff_files'   => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__filtered_gff_files' );
has '_number_of_files'      => ( is => 'ro', isa => 'Int',      lazy => 1, builder => '_build__number_of_files' );
has '_ids_to_groups'        => ( is => 'rw', isa => 'HashRef',  lazy => 1, builder => '_builder__ids_to_groups' );
has '_group_counter'        => ( is => 'rw', isa => 'Int', lazy => 1, builder => '_builder__group_counter' );
has '_group_default_prefix' => ( is => 'rw', isa => 'Str', default => 'group_' );
has '_ids_to_verbose_stats' => ( is => 'rw', isa => 'HashRef', lazy_build => 1 );

sub BUILD {
    my ($self) = @_;
    $self->_ids_to_gene_names;
}

sub _builder__group_counter {
    my ($self)        = @_;
    my $prefix        = $self->_group_default_prefix;
    my $highest_group = 0;
    for my $group ( @{ $self->_groups } ) {
        if ( $group =~ /$prefix([\d]+)$/ ) {
            my $group_id = $1;
            if ( $group_id > $highest_group ) {
                $highest_group = $group_id;
            }
        }
    }
    return $highest_group + 1;
}

sub _generate__ids_to_groups {
    my ($self) = @_;
    my %ids_to_groups;

    for my $group ( keys %{ $self->_groups_to_id_names } ) {
        for my $id_name ( @{ $self->_groups_to_id_names->{$group} } ) {
            $ids_to_groups{$id_name} = $group;
        }
    }
    return \%ids_to_groups;
}

sub _builder__ids_to_groups {
    my ($self) = @_;
    return $self->_generate__ids_to_groups;
}

sub _build__output_fh {
    my ($self) = @_;
    open( my $fh, '>', $self->output_filename )
      or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
        error => "Couldnt write output file:" . $self->output_filename );
    return $fh;
}

sub _build__filtered_gff_files {
    my ($self) = @_;
    my @gff_files = grep( /\.gff$/, @{ $self->gff_files } );
    return \@gff_files;
}

sub _build__ids_to_gene_names {
    my ($self) = @_;
    my %ids_to_gene_names;



( run in 0.588 second using v1.01-cache-2.11-cpan-ceb78f64989 )