Bio-AutomatedAnnotation
view release on metacpan or search on metacpan
lib/Bio/AutomatedAnnotation/GeneNameOccurances.pm view on Meta::CPAN
package Bio::AutomatedAnnotation::GeneNameOccurances;
$Bio::AutomatedAnnotation::GeneNameOccurances::VERSION = '1.182770';
# ABSTRACT: Parse the gene names from multiple GFF files and provide a matrix of matches.
use Moose;
use Bio::Tools::GFF;
use Bio::AutomatedAnnotation::Exceptions;
use Bio::AutomatedAnnotation::GeneNamesFromGFF;
has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'all_gene_names' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_all_gene_names' );
has 'sorted_all_gene_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_sorted_all_gene_names' );
has 'gene_name_hashes' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_gene_name_hashes' );
has 'number_of_files' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_number_of_files' );
sub _build_sorted_all_gene_names
{
my ($self) = @_;
my %all_gene_names = %{$self->all_gene_names};
my @sorted_gene_names = sort { $b cmp $a } keys %all_gene_names;
return \@sorted_gene_names;
}
sub _build_number_of_files
{
my ($self) = @_;
return @{$self->gff_files};
}
sub _build_all_gene_names {
my ($self) = @_;
my %all_gene_names;
for my $filename (keys %{$self->gene_name_hashes})
{
for my $gene_name (keys %{$self->gene_name_hashes->{$filename}})
{
$all_gene_names{$gene_name}++;
}
}
return \%all_gene_names;
}
sub _build_gene_name_hashes {
my ($self) = @_;
my %gene_name_hashes;
for my $gff_file ( @{ $self->gff_files } ) {
Bio::AutomatedAnnotation::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $gff_file )
unless ( -e $gff_file );
my $gene_names_object = Bio::AutomatedAnnotation::GeneNamesFromGFF->new( gff_file => $gff_file );
$gene_name_hashes{$gff_file} = $gene_names_object->gene_names ;
}
return \%gene_name_hashes;
}
no Moose;
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Bio::AutomatedAnnotation::GeneNameOccurances - Parse the gene names from multiple GFF files and provide a matrix of matches.
=head1 VERSION
( run in 1.015 second using v1.01-cache-2.11-cpan-39bf76dae61 )