Bio-FastParsers

 view release on metacpan or  search on metacpan

lib/Bio/FastParsers/Blast/Xml.pm  view on Meta::CPAN

use Moose;
use namespace::autoclean;

use Carp;
use XML::Bare;

extends 'Bio::FastParsers::Base';

use aliased 'Bio::FastParsers::Blast::Xml::BlastOutput';

# TODO: check behavior with single iterations, hits or hsps

# public attributes (some inherited)



has 'blast_output' => (
    is       => 'ro',
    isa      => 'Maybe[Bio::FastParsers::Blast::Xml::BlastOutput]',
    init_arg => undef,
    lazy     => 1,

lib/Bio/FastParsers/Blast/Xml.pm  view on Meta::CPAN

    say $bo->db;                    # mcl-db-22species

    # get evalue threshold...
    say $bo->parameters->expect;    # 10

    # ...or equivalently
    my $param = $bo->parameters;
    say $param->expect;             # 10
    say $param->matrix;             # BLOSUM62

    # get the number of iterations (= queries)
    say $bo->count_iterations;      # 3

    # loop through iterations (or queries), hits and hsps
    # this is extremely fast because no data is moved around
    for my $iter ($bo->all_iterations) {
        say $iter->count_hits;      # always available!
        for my $hit ($iter->all_hits) {
            for my $hsp ($hit->all_hsps) {
                # ...
            }
        }
    }

    # ...or nearly equivalently (still ultra-fast)
    # here the container is altered by each iterator call

lib/Bio/FastParsers/Blast/Xml/BlastOutput.pm  view on Meta::CPAN

    is       => 'ro',
    isa      => 'Maybe[Object]',
    required => 1,
    weak_ref => 1,
);


# public array(s) of composed objects


has 'iterations' => (
    traits   => ['Array'],
    is       => 'ro',
    isa      => 'ArrayRef[Bio::FastParsers::Blast::Xml::Iteration]',
    init_arg => undef,
    lazy     => 1,
    builder  => '_build_iterations',
    handles  => {
        count_iterations => 'count',
          all_iterations => 'elements',
          get_iteration  => 'get',
         next_iteration  => 'shift',
    },
);

## no critic (ProhibitUnusedPrivateSubroutines)

sub _build_iterations {
    my $self = shift;
    return [ map { Iteration->new( _root => $_, _parent => $self ) } @{
        forcearray $self->_root->{'BlastOutput_iterations'}->{'Iteration'}
    } ];
}

## use critic



# public composed object(s)


lib/Bio/FastParsers/Blast/Xml/BlastOutput.pm  view on Meta::CPAN

=head1 SYNOPSIS

    # see Bio::FastParsers::Blast::Xml

=head1 DESCRIPTION

This class implements the C<BlastOutput> level of the XML BLAST parser.

=head1 ATTRIBUTES

=head2 iterations

ArrayRef of L<Bio::FastParsers::Blast::Xml::Iteration>

=head2 mbstat

L<Bio::FastParsers::Blast::Xml::Statistics> composed object

=head2 param

L<Bio::FastParsers::Blast::Xml::Parameters> composed object

=head1 METHODS

=head2 count_iterations

Returns the number of Iterations of the BlastOutput.

    # $blast_output is a Bio::FastParsers::Blast::Xml::BlastOutput
    my $count = $blast_output->count_iterations;

This method does not accept any arguments.

=head2 all_iterations

Returns all the Iterations of the BlastOutput (not an array reference).

    # $blast_output is a Bio::FastParsers::Blast::Xml::BlastOutput
    my @iterations = $blast_output->all_iterations;

This method does not accept any arguments.

=head2 get_iteration

Returns one Iteration of the BlastOutput by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Iteration does not exist, this method will return C<undef>.

    # $blast_output is a Bio::FastParsers::Blast::Xml::BlastOutput

lib/Bio/FastParsers/Hmmer/Standard.pm  view on Meta::CPAN


use Bio::FastParsers::Constants qw(:files);
use aliased 'Bio::FastParsers::Hmmer::Standard::Iteration';


# public attributes (inherited)


# private attributes

has '_iterations'  => (
    traits  => ['Array'],
    is      => 'ro',
    isa     => 'ArrayRef[Bio::FastParsers::Hmmer::Standard::Iteration]',
    writer  => '_set_iterations',
    handles => {
         next_iteration  => 'shift',
          get_iteration  => 'get',
          all_iterations => 'elements',
        count_iterations => 'count',
    },
);

sub BUILD {
    my $self = shift;

    my $content = $self->file->slurp;           # includes autodie
    my @iter_blocks = $content =~ m{ ( ^Query: .+?  ^//$ ) }xmsg;
    my @iterations = map { Iteration->new($_) } @iter_blocks;
    $self->_set_iterations( \@iterations );

    return;
}


# aliases

sub next_query {
    return shift->next_iteration;
}

sub get_query {                             ## no critic (RequireArgUnpacking)
    return shift->get_iteration(@_);
}

sub all_queries {
    return shift->all_iterations;
}

sub count_queries {
    return shift->count_iterations;
}

# TODO: improve documentation of HMMER methods

__PACKAGE__->meta->make_immutable;
1;

__END__

=pod

t/blast_xml.t  view on Meta::CPAN


    # get evalue threshold...
    cmp_ok $bo->parameters->expect, '==', $expect,
        'got expected evalue threshold';

    # ...or equivalently
    my $param = $bo->parameters;
    cmp_deeply [ $param->expect, $param->matrix ], [ $expect, $matrix ],
        'got expected blast_output attributes';

    # get the number of iterations (= queries)
    cmp_ok $bo->count_iterations, '==', $iteration_count,
        'got expected number of iterations';

    # get some 1st hsp attributes for all hits of first iteration
    my @hsp_data
        = $collect_mode ? map { [ $_->num, $_->score, $_->midline ] } map {
            $_->get_hsp(0)
        } $bo->get_iteration(0)->all_hits
        :                 map { [ $_->num, $_->score ] } map {
            $_->all_hsps
        } $bo->get_iteration(0)->get_hit(1)
    ;
    cmp_deeply \@hsp_data, $hsp_data_ref,
        'got expected attrs for 1st/all hsp(s) of 2nd/all hit(s) of 1st iter';

    # loop through iterations (or queries), hits and hsps
    # this is extremely fast because no data is moved around
    my @scores;
    my @queries;
    my @hits;
    for my $iter ($bo->all_iterations) {
        cmp_ok $iter->count_hits, '==', $hit_count, 'got expected hit count';
        for my $hit ($iter->all_hits) {
            for my $hsp ($hit->all_hsps) {
                push @scores, ($collect_mode ? $hsp->score : $hsp->positive);
                push @queries, [ $hsp->query_start, $hsp->query_end ];
                push @hits,    [ $hsp->hit_start,   $hsp->hit_end   ];
            }
        }
    }
    cmp_deeply \@scores, $scores_ref,

t/hmmer_std.t  view on Meta::CPAN

use autodie;
use feature qw(say);

use Path::Class qw(file);

use Bio::FastParsers;
use Smart::Comments;

my $class = 'Bio::FastParsers::Hmmer::Standard';

check_iterations(
    file('test', 'hmmer_double_short.stdout'),
    2,
);

check_info_and_targets(
    file('test', 'hmmer3.stdout'),
        [ 'Meredith169AA', 6705 ],
        [
            [ 0, 12909.9, 47.3, 0, 4943.5, 15.5, 3.0, 3, 'Abrocoma_bennettii', undef ],
        ],

t/hmmer_std.t  view on Meta::CPAN

);

check_domains(
    file('test', 'hmmer_domthresh.stdout'),
    [
        [],
    ],
);


sub check_iterations {
    my $infile          = shift;
    my $exp_iterations  = shift;

    ok my $report = $class->new( file => $infile ),
        'Hmmer::Standard constructor';
    isa_ok $report, $class, $infile;
    cmp_ok $report->count_iterations, '==', $exp_iterations,
        'got expected number of iterations';

    return;
}


sub check_info_and_targets {
    my $infile = shift;
    my $exp_info    = shift;
    my $exp_targets = shift;

test/blastn.xml  view on Meta::CPAN

  <BlastOutput_param>
    <Parameters>
      <Parameters_expect>10</Parameters_expect>
      <Parameters_sc-match>1</Parameters_sc-match>
      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
      <Parameters_gap-open>0</Parameters_gap-open>
      <Parameters_gap-extend>0</Parameters_gap-extend>
      <Parameters_filter>L;m;</Parameters_filter>
    </Parameters>
  </BlastOutput_param>
  <BlastOutput_iterations>
    <Iteration>
      <Iteration_iter-num>1</Iteration_iter-num>
      <Iteration_query-ID>Query_1</Iteration_query-ID>
      <Iteration_query-def>gi|6995995|ref|NM_000492.2| Homo sapiens cystic fibrosis transmembrane conductance regulator, ATP-binding cassette (sub-family C, member 7) (CFTR), mRNA</Iteration_query-def>
      <Iteration_query-len>6129</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gi|180331|gb|M28668.1|HUMCFTRM</Hit_id>
          <Hit_def>Human cystic fibrosis mRNA, encoding a presumed transmembrane conductance regulator (CFTR)</Hit_def>

test/blastn.xml  view on Meta::CPAN

          <Statistics_db-num>16789277</Statistics_db-num>
          <Statistics_db-len>43111105184</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>284036760452322</Statistics_eff-space>
          <Statistics_kappa>0.46</Statistics_kappa>
          <Statistics_lambda>1.28</Statistics_lambda>
          <Statistics_entropy>0.85</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
    </Iteration>
  </BlastOutput_iterations>
</BlastOutput>

test/blastn2.xml  view on Meta::CPAN

  <BlastOutput_param>
    <Parameters>
      <Parameters_expect>10</Parameters_expect>
      <Parameters_sc-match>1</Parameters_sc-match>
      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
      <Parameters_gap-open>0</Parameters_gap-open>
      <Parameters_gap-extend>0</Parameters_gap-extend>
      <Parameters_filter>L;m;</Parameters_filter>
    </Parameters>
  </BlastOutput_param>
  <BlastOutput_iterations>
    <Iteration>
      <Iteration_iter-num>1</Iteration_iter-num>
      <Iteration_query-ID>Query_1</Iteration_query-ID>
      <Iteration_query-def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Iteration_query-def>
      <Iteration_query-len>22897</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gnl|BL_ORD_ID|0</Hit_id>
          <Hit_def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Hit_def>

test/blastn2.xml  view on Meta::CPAN

          <Statistics_db-num>2</Statistics_db-num>
          <Statistics_db-len>38655</Statistics_db-len>
          <Statistics_hsp-len>20</Statistics_hsp-len>
          <Statistics_eff-space>607722870</Statistics_eff-space>
          <Statistics_kappa>0.46</Statistics_kappa>
          <Statistics_lambda>1.28</Statistics_lambda>
          <Statistics_entropy>0.85</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
    </Iteration>
  </BlastOutput_iterations>
</BlastOutput>

test/blastp.xml  view on Meta::CPAN

  <BlastOutput_query-len>348</BlastOutput_query-len>
  <BlastOutput_param>
    <Parameters>
      <Parameters_matrix>BLOSUM62</Parameters_matrix>
      <Parameters_expect>10</Parameters_expect>
      <Parameters_gap-open>11</Parameters_gap-open>
      <Parameters_gap-extend>1</Parameters_gap-extend>
      <Parameters_filter>F</Parameters_filter>
    </Parameters>
  </BlastOutput_param>
  <BlastOutput_iterations>
    <Iteration>
      <Iteration_iter-num>1</Iteration_iter-num>
      <Iteration_query-ID>Query_1</Iteration_query-ID>
      <Iteration_query-def>gi|2105139|gb|AAC47544.1| hemoglobin [Daphnia magna]</Iteration_query-def>
      <Iteration_query-len>348</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gi|2105139|gb|AAC47544.1|</Hit_id>
          <Hit_def>hemoglobin [Daphnia magna] &gt;gi|4589707|dbj|BAA76872.1| hemoglobin [Daphnia magna] &gt;gi|322229320|dbj|BAJ72727.1| 2-domain hemoglobin [Daphnia magna]</Hit_def>

test/blastp.xml  view on Meta::CPAN

          <Statistics_db-num>21588058</Statistics_db-num>
          <Statistics_db-len>7402657167</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>978067833820</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
    </Iteration>
  </BlastOutput_iterations>
</BlastOutput>

test/blastx.xml  view on Meta::CPAN

  <BlastOutput_query-len>6129</BlastOutput_query-len>
  <BlastOutput_param>
    <Parameters>
      <Parameters_matrix>BLOSUM62</Parameters_matrix>
      <Parameters_expect>10</Parameters_expect>
      <Parameters_gap-open>11</Parameters_gap-open>
      <Parameters_gap-extend>1</Parameters_gap-extend>
      <Parameters_filter>L;</Parameters_filter>
    </Parameters>
  </BlastOutput_param>
  <BlastOutput_iterations>
    <Iteration>
      <Iteration_iter-num>1</Iteration_iter-num>
      <Iteration_query-ID>Query_1</Iteration_query-ID>
      <Iteration_query-def>gi|6995995|ref|NM_000492.2| Homo sapiens cystic fibrosis transmembrane conductance regulator, ATP-binding cassette (sub-family C, member 7) (CFTR), mRNA</Iteration_query-def>
      <Iteration_query-len>6129</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gi|180332|gb|AAA35680.1|</Hit_id>
          <Hit_def>cystic fibrosis transmembrane conductance regulator [Homo sapiens]</Hit_def>

test/blastx.xml  view on Meta::CPAN

          <Statistics_db-num>21588058</Statistics_db-num>
          <Statistics_db-len>7402657167</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>8288600526830</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
    </Iteration>
  </BlastOutput_iterations>
</BlastOutput>

test/tblastn.xml  view on Meta::CPAN

  <BlastOutput_query-len>348</BlastOutput_query-len>
  <BlastOutput_param>
    <Parameters>
      <Parameters_matrix>BLOSUM62</Parameters_matrix>
      <Parameters_expect>10</Parameters_expect>
      <Parameters_gap-open>11</Parameters_gap-open>
      <Parameters_gap-extend>1</Parameters_gap-extend>
      <Parameters_filter>L;</Parameters_filter>
    </Parameters>
  </BlastOutput_param>
  <BlastOutput_iterations>
    <Iteration>
      <Iteration_iter-num>1</Iteration_iter-num>
      <Iteration_query-ID>Query_1</Iteration_query-ID>
      <Iteration_query-def>gi|2105139|gb|AAC47544.1| hemoglobin [Daphnia magna]</Iteration_query-def>
      <Iteration_query-len>348</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gi|2105138|gb|U67067.1|DMU67067</Hit_id>
          <Hit_def>Daphnia magna hemoglobin (Dhb1) mRNA, complete cds</Hit_def>

test/tblastn.xml  view on Meta::CPAN

          <Statistics_db-num>16789277</Statistics_db-num>
          <Statistics_db-len>43111105184</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>2422090766106</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
    </Iteration>
  </BlastOutput_iterations>
</BlastOutput>

test/tblastx.xml  view on Meta::CPAN

  <BlastOutput_query-len>22897</BlastOutput_query-len>
  <BlastOutput_param>
    <Parameters>
      <Parameters_matrix>BLOSUM62</Parameters_matrix>
      <Parameters_expect>10</Parameters_expect>
      <Parameters_gap-open>11</Parameters_gap-open>
      <Parameters_gap-extend>1</Parameters_gap-extend>
      <Parameters_filter>L;</Parameters_filter>
    </Parameters>
  </BlastOutput_param>
  <BlastOutput_iterations>
    <Iteration>
      <Iteration_iter-num>1</Iteration_iter-num>
      <Iteration_query-ID>Query_1</Iteration_query-ID>
      <Iteration_query-def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Iteration_query-def>
      <Iteration_query-len>22897</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gnl|BL_ORD_ID|0</Hit_id>
          <Hit_def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Hit_def>

test/tblastx.xml  view on Meta::CPAN

          <Statistics_db-num>2</Statistics_db-num>
          <Statistics_db-len>38655</Statistics_db-len>
          <Statistics_hsp-len>37</Statistics_hsp-len>
          <Statistics_eff-space>97299545</Statistics_eff-space>
          <Statistics_kappa>0.133956144488482</Statistics_kappa>
          <Statistics_lambda>0.317605957635731</Statistics_lambda>
          <Statistics_entropy>0.401214524497119</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
    </Iteration>
  </BlastOutput_iterations>
</BlastOutput>



( run in 2.241 seconds using v1.01-cache-2.11-cpan-96521ef73a4 )