view release on metacpan or search on metacpan
lib/Bio/FastParsers/Blast/Xml.pm view on Meta::CPAN
use Moose;
use namespace::autoclean;
use Carp;
use XML::Bare;
extends 'Bio::FastParsers::Base';
use aliased 'Bio::FastParsers::Blast::Xml::BlastOutput';
# TODO: check behavior with single iterations, hits or hsps
# public attributes (some inherited)
has 'blast_output' => (
is => 'ro',
isa => 'Maybe[Bio::FastParsers::Blast::Xml::BlastOutput]',
init_arg => undef,
lazy => 1,
lib/Bio/FastParsers/Blast/Xml.pm view on Meta::CPAN
say $bo->db; # mcl-db-22species
# get evalue threshold...
say $bo->parameters->expect; # 10
# ...or equivalently
my $param = $bo->parameters;
say $param->expect; # 10
say $param->matrix; # BLOSUM62
# get the number of iterations (= queries)
say $bo->count_iterations; # 3
# loop through iterations (or queries), hits and hsps
# this is extremely fast because no data is moved around
for my $iter ($bo->all_iterations) {
say $iter->count_hits; # always available!
for my $hit ($iter->all_hits) {
for my $hsp ($hit->all_hsps) {
# ...
}
}
}
# ...or nearly equivalently (still ultra-fast)
# here the container is altered by each iterator call
lib/Bio/FastParsers/Blast/Xml/BlastOutput.pm view on Meta::CPAN
is => 'ro',
isa => 'Maybe[Object]',
required => 1,
weak_ref => 1,
);
# public array(s) of composed objects
has 'iterations' => (
traits => ['Array'],
is => 'ro',
isa => 'ArrayRef[Bio::FastParsers::Blast::Xml::Iteration]',
init_arg => undef,
lazy => 1,
builder => '_build_iterations',
handles => {
count_iterations => 'count',
all_iterations => 'elements',
get_iteration => 'get',
next_iteration => 'shift',
},
);
## no critic (ProhibitUnusedPrivateSubroutines)
sub _build_iterations {
my $self = shift;
return [ map { Iteration->new( _root => $_, _parent => $self ) } @{
forcearray $self->_root->{'BlastOutput_iterations'}->{'Iteration'}
} ];
}
## use critic
# public composed object(s)
lib/Bio/FastParsers/Blast/Xml/BlastOutput.pm view on Meta::CPAN
=head1 SYNOPSIS
# see Bio::FastParsers::Blast::Xml
=head1 DESCRIPTION
This class implements the C<BlastOutput> level of the XML BLAST parser.
=head1 ATTRIBUTES
=head2 iterations
ArrayRef of L<Bio::FastParsers::Blast::Xml::Iteration>
=head2 mbstat
L<Bio::FastParsers::Blast::Xml::Statistics> composed object
=head2 param
L<Bio::FastParsers::Blast::Xml::Parameters> composed object
=head1 METHODS
=head2 count_iterations
Returns the number of Iterations of the BlastOutput.
# $blast_output is a Bio::FastParsers::Blast::Xml::BlastOutput
my $count = $blast_output->count_iterations;
This method does not accept any arguments.
=head2 all_iterations
Returns all the Iterations of the BlastOutput (not an array reference).
# $blast_output is a Bio::FastParsers::Blast::Xml::BlastOutput
my @iterations = $blast_output->all_iterations;
This method does not accept any arguments.
=head2 get_iteration
Returns one Iteration of the BlastOutput by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Iteration does not exist, this method will return C<undef>.
# $blast_output is a Bio::FastParsers::Blast::Xml::BlastOutput
lib/Bio/FastParsers/Hmmer/Standard.pm view on Meta::CPAN
use Bio::FastParsers::Constants qw(:files);
use aliased 'Bio::FastParsers::Hmmer::Standard::Iteration';
# public attributes (inherited)
# private attributes
has '_iterations' => (
traits => ['Array'],
is => 'ro',
isa => 'ArrayRef[Bio::FastParsers::Hmmer::Standard::Iteration]',
writer => '_set_iterations',
handles => {
next_iteration => 'shift',
get_iteration => 'get',
all_iterations => 'elements',
count_iterations => 'count',
},
);
sub BUILD {
my $self = shift;
my $content = $self->file->slurp; # includes autodie
my @iter_blocks = $content =~ m{ ( ^Query: .+? ^//$ ) }xmsg;
my @iterations = map { Iteration->new($_) } @iter_blocks;
$self->_set_iterations( \@iterations );
return;
}
# aliases
sub next_query {
return shift->next_iteration;
}
sub get_query { ## no critic (RequireArgUnpacking)
return shift->get_iteration(@_);
}
sub all_queries {
return shift->all_iterations;
}
sub count_queries {
return shift->count_iterations;
}
# TODO: improve documentation of HMMER methods
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
t/blast_xml.t view on Meta::CPAN
# get evalue threshold...
cmp_ok $bo->parameters->expect, '==', $expect,
'got expected evalue threshold';
# ...or equivalently
my $param = $bo->parameters;
cmp_deeply [ $param->expect, $param->matrix ], [ $expect, $matrix ],
'got expected blast_output attributes';
# get the number of iterations (= queries)
cmp_ok $bo->count_iterations, '==', $iteration_count,
'got expected number of iterations';
# get some 1st hsp attributes for all hits of first iteration
my @hsp_data
= $collect_mode ? map { [ $_->num, $_->score, $_->midline ] } map {
$_->get_hsp(0)
} $bo->get_iteration(0)->all_hits
: map { [ $_->num, $_->score ] } map {
$_->all_hsps
} $bo->get_iteration(0)->get_hit(1)
;
cmp_deeply \@hsp_data, $hsp_data_ref,
'got expected attrs for 1st/all hsp(s) of 2nd/all hit(s) of 1st iter';
# loop through iterations (or queries), hits and hsps
# this is extremely fast because no data is moved around
my @scores;
my @queries;
my @hits;
for my $iter ($bo->all_iterations) {
cmp_ok $iter->count_hits, '==', $hit_count, 'got expected hit count';
for my $hit ($iter->all_hits) {
for my $hsp ($hit->all_hsps) {
push @scores, ($collect_mode ? $hsp->score : $hsp->positive);
push @queries, [ $hsp->query_start, $hsp->query_end ];
push @hits, [ $hsp->hit_start, $hsp->hit_end ];
}
}
}
cmp_deeply \@scores, $scores_ref,
t/hmmer_std.t view on Meta::CPAN
use autodie;
use feature qw(say);
use Path::Class qw(file);
use Bio::FastParsers;
use Smart::Comments;
my $class = 'Bio::FastParsers::Hmmer::Standard';
check_iterations(
file('test', 'hmmer_double_short.stdout'),
2,
);
check_info_and_targets(
file('test', 'hmmer3.stdout'),
[ 'Meredith169AA', 6705 ],
[
[ 0, 12909.9, 47.3, 0, 4943.5, 15.5, 3.0, 3, 'Abrocoma_bennettii', undef ],
],
t/hmmer_std.t view on Meta::CPAN
);
check_domains(
file('test', 'hmmer_domthresh.stdout'),
[
[],
],
);
sub check_iterations {
my $infile = shift;
my $exp_iterations = shift;
ok my $report = $class->new( file => $infile ),
'Hmmer::Standard constructor';
isa_ok $report, $class, $infile;
cmp_ok $report->count_iterations, '==', $exp_iterations,
'got expected number of iterations';
return;
}
sub check_info_and_targets {
my $infile = shift;
my $exp_info = shift;
my $exp_targets = shift;
test/blastn.xml view on Meta::CPAN
<BlastOutput_param>
<Parameters>
<Parameters_expect>10</Parameters_expect>
<Parameters_sc-match>1</Parameters_sc-match>
<Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
<Parameters_gap-open>0</Parameters_gap-open>
<Parameters_gap-extend>0</Parameters_gap-extend>
<Parameters_filter>L;m;</Parameters_filter>
</Parameters>
</BlastOutput_param>
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
<Iteration_query-ID>Query_1</Iteration_query-ID>
<Iteration_query-def>gi|6995995|ref|NM_000492.2| Homo sapiens cystic fibrosis transmembrane conductance regulator, ATP-binding cassette (sub-family C, member 7) (CFTR), mRNA</Iteration_query-def>
<Iteration_query-len>6129</Iteration_query-len>
<Iteration_hits>
<Hit>
<Hit_num>1</Hit_num>
<Hit_id>gi|180331|gb|M28668.1|HUMCFTRM</Hit_id>
<Hit_def>Human cystic fibrosis mRNA, encoding a presumed transmembrane conductance regulator (CFTR)</Hit_def>
test/blastn.xml view on Meta::CPAN
<Statistics_db-num>16789277</Statistics_db-num>
<Statistics_db-len>43111105184</Statistics_db-len>
<Statistics_hsp-len>0</Statistics_hsp-len>
<Statistics_eff-space>284036760452322</Statistics_eff-space>
<Statistics_kappa>0.46</Statistics_kappa>
<Statistics_lambda>1.28</Statistics_lambda>
<Statistics_entropy>0.85</Statistics_entropy>
</Statistics>
</Iteration_stat>
</Iteration>
</BlastOutput_iterations>
</BlastOutput>
test/blastn2.xml view on Meta::CPAN
<BlastOutput_param>
<Parameters>
<Parameters_expect>10</Parameters_expect>
<Parameters_sc-match>1</Parameters_sc-match>
<Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
<Parameters_gap-open>0</Parameters_gap-open>
<Parameters_gap-extend>0</Parameters_gap-extend>
<Parameters_filter>L;m;</Parameters_filter>
</Parameters>
</BlastOutput_param>
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
<Iteration_query-ID>Query_1</Iteration_query-ID>
<Iteration_query-def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Iteration_query-def>
<Iteration_query-len>22897</Iteration_query-len>
<Iteration_hits>
<Hit>
<Hit_num>1</Hit_num>
<Hit_id>gnl|BL_ORD_ID|0</Hit_id>
<Hit_def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Hit_def>
test/blastn2.xml view on Meta::CPAN
<Statistics_db-num>2</Statistics_db-num>
<Statistics_db-len>38655</Statistics_db-len>
<Statistics_hsp-len>20</Statistics_hsp-len>
<Statistics_eff-space>607722870</Statistics_eff-space>
<Statistics_kappa>0.46</Statistics_kappa>
<Statistics_lambda>1.28</Statistics_lambda>
<Statistics_entropy>0.85</Statistics_entropy>
</Statistics>
</Iteration_stat>
</Iteration>
</BlastOutput_iterations>
</BlastOutput>
test/blastp.xml view on Meta::CPAN
<BlastOutput_query-len>348</BlastOutput_query-len>
<BlastOutput_param>
<Parameters>
<Parameters_matrix>BLOSUM62</Parameters_matrix>
<Parameters_expect>10</Parameters_expect>
<Parameters_gap-open>11</Parameters_gap-open>
<Parameters_gap-extend>1</Parameters_gap-extend>
<Parameters_filter>F</Parameters_filter>
</Parameters>
</BlastOutput_param>
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
<Iteration_query-ID>Query_1</Iteration_query-ID>
<Iteration_query-def>gi|2105139|gb|AAC47544.1| hemoglobin [Daphnia magna]</Iteration_query-def>
<Iteration_query-len>348</Iteration_query-len>
<Iteration_hits>
<Hit>
<Hit_num>1</Hit_num>
<Hit_id>gi|2105139|gb|AAC47544.1|</Hit_id>
<Hit_def>hemoglobin [Daphnia magna] >gi|4589707|dbj|BAA76872.1| hemoglobin [Daphnia magna] >gi|322229320|dbj|BAJ72727.1| 2-domain hemoglobin [Daphnia magna]</Hit_def>
test/blastp.xml view on Meta::CPAN
<Statistics_db-num>21588058</Statistics_db-num>
<Statistics_db-len>7402657167</Statistics_db-len>
<Statistics_hsp-len>0</Statistics_hsp-len>
<Statistics_eff-space>978067833820</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
</Statistics>
</Iteration_stat>
</Iteration>
</BlastOutput_iterations>
</BlastOutput>
test/blastx.xml view on Meta::CPAN
<BlastOutput_query-len>6129</BlastOutput_query-len>
<BlastOutput_param>
<Parameters>
<Parameters_matrix>BLOSUM62</Parameters_matrix>
<Parameters_expect>10</Parameters_expect>
<Parameters_gap-open>11</Parameters_gap-open>
<Parameters_gap-extend>1</Parameters_gap-extend>
<Parameters_filter>L;</Parameters_filter>
</Parameters>
</BlastOutput_param>
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
<Iteration_query-ID>Query_1</Iteration_query-ID>
<Iteration_query-def>gi|6995995|ref|NM_000492.2| Homo sapiens cystic fibrosis transmembrane conductance regulator, ATP-binding cassette (sub-family C, member 7) (CFTR), mRNA</Iteration_query-def>
<Iteration_query-len>6129</Iteration_query-len>
<Iteration_hits>
<Hit>
<Hit_num>1</Hit_num>
<Hit_id>gi|180332|gb|AAA35680.1|</Hit_id>
<Hit_def>cystic fibrosis transmembrane conductance regulator [Homo sapiens]</Hit_def>
test/blastx.xml view on Meta::CPAN
<Statistics_db-num>21588058</Statistics_db-num>
<Statistics_db-len>7402657167</Statistics_db-len>
<Statistics_hsp-len>0</Statistics_hsp-len>
<Statistics_eff-space>8288600526830</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
</Statistics>
</Iteration_stat>
</Iteration>
</BlastOutput_iterations>
</BlastOutput>
test/tblastn.xml view on Meta::CPAN
<BlastOutput_query-len>348</BlastOutput_query-len>
<BlastOutput_param>
<Parameters>
<Parameters_matrix>BLOSUM62</Parameters_matrix>
<Parameters_expect>10</Parameters_expect>
<Parameters_gap-open>11</Parameters_gap-open>
<Parameters_gap-extend>1</Parameters_gap-extend>
<Parameters_filter>L;</Parameters_filter>
</Parameters>
</BlastOutput_param>
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
<Iteration_query-ID>Query_1</Iteration_query-ID>
<Iteration_query-def>gi|2105139|gb|AAC47544.1| hemoglobin [Daphnia magna]</Iteration_query-def>
<Iteration_query-len>348</Iteration_query-len>
<Iteration_hits>
<Hit>
<Hit_num>1</Hit_num>
<Hit_id>gi|2105138|gb|U67067.1|DMU67067</Hit_id>
<Hit_def>Daphnia magna hemoglobin (Dhb1) mRNA, complete cds</Hit_def>
test/tblastn.xml view on Meta::CPAN
<Statistics_db-num>16789277</Statistics_db-num>
<Statistics_db-len>43111105184</Statistics_db-len>
<Statistics_hsp-len>0</Statistics_hsp-len>
<Statistics_eff-space>2422090766106</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
</Statistics>
</Iteration_stat>
</Iteration>
</BlastOutput_iterations>
</BlastOutput>
test/tblastx.xml view on Meta::CPAN
<BlastOutput_query-len>22897</BlastOutput_query-len>
<BlastOutput_param>
<Parameters>
<Parameters_matrix>BLOSUM62</Parameters_matrix>
<Parameters_expect>10</Parameters_expect>
<Parameters_gap-open>11</Parameters_gap-open>
<Parameters_gap-extend>1</Parameters_gap-extend>
<Parameters_filter>L;</Parameters_filter>
</Parameters>
</BlastOutput_param>
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
<Iteration_query-ID>Query_1</Iteration_query-ID>
<Iteration_query-def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Iteration_query-def>
<Iteration_query-len>22897</Iteration_query-len>
<Iteration_hits>
<Hit>
<Hit_num>1</Hit_num>
<Hit_id>gnl|BL_ORD_ID|0</Hit_id>
<Hit_def>gi|11465907|ref|NC_001872.1| Chlamydomonas eugametos mitochondrion, complete genome</Hit_def>
test/tblastx.xml view on Meta::CPAN
<Statistics_db-num>2</Statistics_db-num>
<Statistics_db-len>38655</Statistics_db-len>
<Statistics_hsp-len>37</Statistics_hsp-len>
<Statistics_eff-space>97299545</Statistics_eff-space>
<Statistics_kappa>0.133956144488482</Statistics_kappa>
<Statistics_lambda>0.317605957635731</Statistics_lambda>
<Statistics_entropy>0.401214524497119</Statistics_entropy>
</Statistics>
</Iteration_stat>
</Iteration>
</BlastOutput_iterations>
</BlastOutput>