Bio-FastParsers

 view release on metacpan or  search on metacpan

t/blast_xml.t  view on Meta::CPAN

            'RFVKAHPEYQKMFSKFANVPQSELL NGNFLAQAYTILAGLNVVIQSLFSQELMANQLNALGGAHQ RGATP+MFE                        QFGGIL+EVLAEELGS FTAEARQAWKNGLAALVAGIAKNLK'
        ],
        [
            '1',
            '553',
            'RFVKAHPEYQKMFSKFANVPQSELL NGNFLAQAYTILAGLNVVIQSLFSQELMANQLNALGGAHQ RGATP+MFE                        QFGGIL+EVLAEELGS FTAEARQAWKNGLAALVAGIAKNLK'
        ],
        [
            '1',
            '553',
            'RFVKAHPEYQKMFSKFANVPQSELL NGNFLAQAYTILAGLNVVIQSLFSQELMANQLNALGGAHQ RGATP+MFE                        QFGGIL+EVLAEELGS FTAEARQAWKNGLAALVAGIAKNLK'
        ]
    ]
);

check_file(
    infile  => file('test', 'tblastx.xml'),
    program => 'tblastx',
    db      => 'chlamy_mito',
    expect  => 10,
    matrix  => 'BLOSUM62',
    collect_mode => 0,
	iteration_count => 1,
	hit_count       => 2,
	scores_ref      => [ qw(1522 1309 799 818 806 630 656 645 564 562 589 552 546 511 500 459 488 460 465 418 454 434 406 362 368 356 409 385 368 350 327 356 335 344 347 344 337 318 296 306 321 308 332 323 352 304 301 280 274 296 310 261 274 249 262 273...
	identities_ref  => [ qw(17974 12910 19148 3001 15511 7729 11545 6324 12444 18949 418 9845 6621 3179 8295 17436 2800 15335 999 8748 13013 2229 17733 13968 12905 16602 20418 5527 12243 11807 15569 16329 18005 6173 15237 20695 16958 21584 11923 11648 3...
	hsp_data_ref    => [
        [ '1', '787' ], [ '2', '709' ], [ '3', '576' ], [ '4', '436' ], [ '5', '267' ], [ '6', '142' ], [ '7', '129' ], [ '8', '55' ], [ '9', '676' ], [ '10', '503' ], [ '11', '453' ], [ '12', '394' ], [ '13', '292' ], [ '14', '160' ], [ '15', '84' ]...
    ]
);

{
    # open and parse BLAST report in XML format
    my $infile = file('test', 'blastn2.xml');
    my $report = $class->new( file => $infile );
    isa_ok $report, $class, $infile;

    # get main container
    my $bo = $report->blast_output;

    my $hsp_data_ref = [
        1, -1,
        17538, 17582,
        17538, 17582,
        1, 17538, 17582,
        -1, 17582, 17538,
        22897,22897,
        0.2,0.2,
        95.7,95.7

    ];
    
    my @hsp_data = map {
        $_->query_strand, $_->hit_strand,
        $_->query_start, $_->query_end,
        $_->hit_start, $_->hit_end,
        $_->query_frame, $_->query_from, $_->query_to,
        $_->hit_frame, $_->hit_from, $_->hit_to,
        $_->query_len, $_->hit_len,
        $_->qcov, $_->subject_coverage,
        $_->pident, $_->percentage_positive
    } map { $_->get_hsp(27) } $bo->get_iteration(0)->get_hit(0);

    cmp_deeply \@hsp_data, $hsp_data_ref,
        'got expected coordinates for hsp 27';
}


sub check_file {
	my %args = @_;

	my (
	    $infile, $program, $db, $expect, $matrix,
	    $collect_mode, $iteration_count, $hit_count,
	    $scores_ref, $identities_ref, $hsp_data_ref
	) = @args{ qw(
	    infile program db expect matrix
	    collect_mode iteration_count hit_count
	    scores_ref identities_ref hsp_data_ref
	) };

    # open and parse BLAST report in XML format
    ok my $report = $class->new( file => $infile ), 'Blast::XML constructor';
    isa_ok $report, $class, $infile;

    # get main container
    my $bo = $report->blast_output;

    # examine report content
    cmp_deeply [ $bo->program, $bo->db ], [ $program, $db ],
        'got expected blast_output attributes';

    # get evalue threshold...
    cmp_ok $bo->parameters->expect, '==', $expect,
        'got expected evalue threshold';

    # ...or equivalently
    my $param = $bo->parameters;
    cmp_deeply [ $param->expect, $param->matrix ], [ $expect, $matrix ],
        'got expected blast_output attributes';

    # get the number of iterations (= queries)
    cmp_ok $bo->count_iterations, '==', $iteration_count,
        'got expected number of iterations';

    # get some 1st hsp attributes for all hits of first iteration
    my @hsp_data
        = $collect_mode ? map { [ $_->num, $_->score, $_->midline ] } map {
            $_->get_hsp(0)
        } $bo->get_iteration(0)->all_hits
        :                 map { [ $_->num, $_->score ] } map {
            $_->all_hsps
        } $bo->get_iteration(0)->get_hit(1)
    ;
    cmp_deeply \@hsp_data, $hsp_data_ref,
        'got expected attrs for 1st/all hsp(s) of 2nd/all hit(s) of 1st iter';

    # loop through iterations (or queries), hits and hsps
    # this is extremely fast because no data is moved around
    my @scores;
    my @queries;



( run in 0.901 second using v1.01-cache-2.11-cpan-39bf76dae61 )