Bio-AutomatedAnnotation

 view release on metacpan or  search on metacpan

lib/Bio/AutomatedAnnotation/ParseGenesFromGFF.pm  view on Meta::CPAN

package Bio::AutomatedAnnotation::ParseGenesFromGFF;
$Bio::AutomatedAnnotation::ParseGenesFromGFF::VERSION = '1.182770';
# ABSTRACT: Parse a GFF file and efficiency extract the gene sequence.


use Moose;
use Bio::Tools::GFF;
use Bio::PrimarySeq;
use Bio::SeqIO;
use Bio::Perl;

has 'gff_file'     => ( is => 'ro', isa => 'Str', required => 1 );
has 'search_query' => ( is => 'ro', isa => 'Str', required => 1 );

has '_awk_filter' => ( is => 'ro', isa => 'Str',             lazy => 1, builder => '_build__awk_filter' );
has '_gff_parser' => ( is => 'ro', isa => 'Bio::Tools::GFF', lazy => 1, builder => '_build__gff_parser' );
has '_tags_to_filter'    => ( is => 'ro', isa => 'Str',      default => 'CDS' );
has '_matching_features' => ( is => 'ro', isa => 'ArrayRef', lazy    => 1, builder => '_build__matching_features' );
has '_bio_seq_objects'   => ( is => 'ro', isa => 'ArrayRef', lazy    => 1, builder => '_build__bio_seq_objects' );
has 'search_qualifiers' => ( is => 'ro', isa => 'ArrayRef', lazy    => 1, builder => '_build_search_qualifiers' );
has '_sequences'         => ( is => 'ro', isa => 'HashRef',  lazy    => 1, builder => '_build__sequences' );


sub _build_search_qualifiers
{
  my ($self) = @_;
  return [ 'gene', 'product' ];
}

sub _build__sequences {
    my ($self) = @_;
    my %seq_names_to_sequences;
    my @sequences = $self->_gff_parser->get_seqs;
    for my $sequence (@sequences) {
        $seq_names_to_sequences{ $sequence->id } = $sequence;
    }
    return \%seq_names_to_sequences;
}

sub _build__matching_features {
    my ($self) = @_;
    my @tag_names = @{$self->search_qualifiers};
    my @matching_features;
    my $search_query = $self->search_query;

    while ( my $raw_feature = $self->_gff_parser->next_feature() ) {
        for my $tag_name (@tag_names) {
            if ( $raw_feature->has_tag($tag_name) ) {
                my @tag_values = $raw_feature->get_tag_values($tag_name);
                my $values = join(',', @tag_values);
                if ( $values =~ /$search_query/ ){
                  push( @matching_features, $raw_feature );
                  last;
                }

                #for my $tag_value (@tag_values) {
                #    if ( $tag_value =~ /$search_query/ ) {
                #        push( @matching_features, $raw_feature );
                #        last;
                #    }
                #}
            }
            last if ( @matching_features > 0 && $raw_feature eq $matching_features[-1] );
        }
    }
    return \@matching_features;
}

sub _build__gff_parser {
    my ($self) = @_;
    open( my $fh, '-|', $self->_awk_filter." ".$self->gff_file );
    return Bio::Tools::GFF->new( -gff_version => 3, -fh => $fh, alphabet => 'dna');
}

sub _find_feature_id {
    my ( $self, $feature ) = @_;
    my $gene_id;
    my @junk;
    my @tag_names = ( 'ID', 'locus_tag' );

    for my $tag_name (@tag_names) {



( run in 1.142 second using v1.01-cache-2.11-cpan-39bf76dae61 )