Bio-AutomatedAnnotation
view release on metacpan or search on metacpan
lib/Bio/AutomatedAnnotation/ParseGenesFromGFF.pm view on Meta::CPAN
package Bio::AutomatedAnnotation::ParseGenesFromGFF;
$Bio::AutomatedAnnotation::ParseGenesFromGFF::VERSION = '1.182770';
# ABSTRACT: Parse a GFF file and efficiency extract the gene sequence.
use Moose;
use Bio::Tools::GFF;
use Bio::PrimarySeq;
use Bio::SeqIO;
use Bio::Perl;
has 'gff_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'search_query' => ( is => 'ro', isa => 'Str', required => 1 );
has '_awk_filter' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__awk_filter' );
has '_gff_parser' => ( is => 'ro', isa => 'Bio::Tools::GFF', lazy => 1, builder => '_build__gff_parser' );
has '_tags_to_filter' => ( is => 'ro', isa => 'Str', default => 'CDS' );
has '_matching_features' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__matching_features' );
has '_bio_seq_objects' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__bio_seq_objects' );
has 'search_qualifiers' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_search_qualifiers' );
has '_sequences' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__sequences' );
sub _build_search_qualifiers
{
my ($self) = @_;
return [ 'gene', 'product' ];
}
sub _build__sequences {
my ($self) = @_;
my %seq_names_to_sequences;
my @sequences = $self->_gff_parser->get_seqs;
for my $sequence (@sequences) {
$seq_names_to_sequences{ $sequence->id } = $sequence;
}
return \%seq_names_to_sequences;
}
sub _build__matching_features {
my ($self) = @_;
my @tag_names = @{$self->search_qualifiers};
my @matching_features;
my $search_query = $self->search_query;
while ( my $raw_feature = $self->_gff_parser->next_feature() ) {
for my $tag_name (@tag_names) {
if ( $raw_feature->has_tag($tag_name) ) {
my @tag_values = $raw_feature->get_tag_values($tag_name);
my $values = join(',', @tag_values);
if ( $values =~ /$search_query/ ){
push( @matching_features, $raw_feature );
last;
}
#for my $tag_value (@tag_values) {
# if ( $tag_value =~ /$search_query/ ) {
# push( @matching_features, $raw_feature );
# last;
# }
#}
}
last if ( @matching_features > 0 && $raw_feature eq $matching_features[-1] );
}
}
return \@matching_features;
}
sub _build__gff_parser {
my ($self) = @_;
open( my $fh, '-|', $self->_awk_filter." ".$self->gff_file );
return Bio::Tools::GFF->new( -gff_version => 3, -fh => $fh, alphabet => 'dna');
}
sub _find_feature_id {
my ( $self, $feature ) = @_;
my $gene_id;
my @junk;
my @tag_names = ( 'ID', 'locus_tag' );
for my $tag_name (@tag_names) {
( run in 1.142 second using v1.01-cache-2.11-cpan-39bf76dae61 )