Bio-GFF3

 view release on metacpan or  search on metacpan

t/parser.t  view on Meta::CPAN

use strict;
use warnings;

use Test::More 0.88;
use File::Temp;
use File::Spec::Functions 'catfile';

use Bio::GFF3::LowLevel::Parser;

my $p = Bio::GFF3::LowLevel::Parser->open( catfile(qw( t data gff3_with_syncs.gff3 )));
$p->max_lookback( 1 );

# {
#     my $m = $p->_merge_features(
#                { locations => [1], attributes => { Foo => ['baz'], zaz => ['zoz']} },
#                { locations => [2], attributes => { Foo => ['bar'], zee => ['ziz']} }
#             );
#     is_deeply( $m,
#                { locations => [1,2], attributes => { Foo => ['baz','bar'], zaz => ['zoz'], zee => ['ziz'}},
#              ) or diag explain $m;
# }

my %stuff;
while( my $i = $p->next_item ) {
    if( ref $i eq 'ARRAY' ) {
        push @{$stuff{features}}, $i;
        for (@$i) {
            is( $_->{type}, 'gene' );
        }
    }
    elsif( $i->{directive} ) {
        push @{$stuff{directives}}, $i;
    }
    elsif( $i->{FASTA_fh} ) {
        push @{$stuff{fasta}}, $i;
    }
    else {
        die 'this should never happen!';
    }
}

my $right_stuff = do ''.catfile(qw(t data gff3_with_syncs.dumped_result));
is_deeply( \%stuff,
           $right_stuff,
           'parsed the right stuff' )
    or diag explain \%stuff;

# just do some cursory parsing of other files
for (
      [ 1010, 'messy_protein_domains.gff3'],
      [ 4, 'gff3_with_syncs.gff3' ],
      [ 51, 'au9_scaffold_subset.gff3' ],
      [ 14, 'tomato_chr4_head.gff3' ],
      [ 6, 'directives.gff3' ],
      [ 3, 'hybrid1.gff3' ],
      [ 3, 'hybrid2.gff3' ],
      [ 6, 'knownGene.gff3' ],
      [ 6, 'knownGene2.gff3' ],
      [ 16, 'tomato_test.gff3' ],
      [ 3, 'spec_eden.gff3' ],
      [ 1, 'spec_match.gff3' ],
      [ 8, 'quantitative.gff3' ],
    ) {
    my ( $count, $f ) = @$_;
    my $p = Bio::GFF3::LowLevel::Parser->open( catfile(qw( t data ), $f ));
    $p->max_lookback(10);
    my @things;
    while( my $thing = $p->next_item ) {
        push @things, $thing;
    }
    is( scalar @things, $count, "parsed $count things from $f" ) or diag explain \@things;
    is( scalar ( grep {ref $_ eq 'HASH' && exists $_->{phase}} @things), 0, "no bare-hashref features in $f" );
}

# check the fasta at the end of the hybrid files
for my $f ( 'hybrid1.gff3', 'hybrid2.gff3' ) {
    my $p = Bio::GFF3::LowLevel::Parser->open( catfile(qw( t data ), $f ));
    $p->max_lookback(3);
    my @items;
    while( my $item = $p->next_item ) {
        push @items, $item;
    }
    is( scalar @items, 3, 'got 3 items' );
    is( $items[-1]->{directive}, 'FASTA', 'last one is a FASTA directive' )
        or diag explain \@items;
    is( slurp_fh($items[-1]->{filehandle}), <<EOF, 'got the right stuff in the filehandle' ) or diag explain $items[-1];
>A00469
GATTACA
GATTACA
EOF
}


{ # try parsing from a string ref
    my $gff3 = <<EOG;
SL2.40ch01	ITAG_eugene	gene	80999140	81004317	.	+	.	Alias=Solyc01g098840;ID=gene:Solyc01g098840.2;Name=Solyc01g098840.2;from_BOGAS=1;length=5178
EOG
    my $i = Bio::GFF3::LowLevel::Parser->open( \$gff3 )->next_item;
    is( $i->[0]{source}, 'ITAG_eugene', 'parsed from a string ref OK' ) or diag explain $i;
    my $tempfile = File::Temp->new;
    $tempfile->print( $gff3 );
    $tempfile->close;
    open my $fh, '<', "$tempfile" or die "$! reading $tempfile";
    $i = Bio::GFF3::LowLevel::Parser->open( $fh  )->next_item;
    is( $i->[0]{source}, 'ITAG_eugene', 'parsed from a filehandle OK' ) or diag explain $i;

}



( run in 1.028 second using v1.01-cache-2.11-cpan-39bf76dae61 )