view release on metacpan or search on metacpan
B<fastr --strict> [B<--ambig>=<char>] [MULTIFASTA-FILE...]
B<fastr --iupac> [B<--ambig>=<char>] [MULTIFASTA-FILE...]
B<fastr --degap> [MULTIFASTA-FILE...]
=head1 DESCRIPTION
B<fastr> takes multifasta format sequence or alignment data as input,
and faciliates character transliterations on identifiers (by default),
sequences or descriptions using the Perl B<tr///> character
transliteration operator. This faciliates character remapping, case
changes, character deletions, degapping, squashing of repeated
characters, and more. Special modes B<--strict>, B<--iupac>, and B<--degap> automatically direct transliterations on
facilitate enforcement of sequence alphabets, and remapping illegal
characters to 'N' or 'X' or a user-defined character.
Options specific to B<fastr>:
B<-s>, B<--sequence> transliterate sequences
B<-d>, B<--description> transliterate descriptions
B<-D>, B<--delete> delete found characters not replaced
B<-S>, B<--squash> squash duplicate replaced characters
B<-n>, B<--no-replace> squash/delete characters in searchlist
B<-c>, B<--complement> complement searchlist as a character set
lib/FAST/Bio/Search/Result/CrossMatchResult.pm view on Meta::CPAN
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
FAST::Bio::Search::Result::CrossMatchResult - CrossMatch-specific subclass of FAST::Bio::Search::Result::GenericResult
=head1 SYNOPSIS
# Working with iterations (CrossMatch results)
$result->next_iteration();
$result->num_iterations();
$result->iteration();
$result->iterations();
# See FAST::Bio::Search::Result::GenericResult for information about working with Results.
# See L<FAST::Bio::Search::Iteration::IterationI|FAST::Bio::Search::Iteration::IterationI>
# for details about working with iterations.
# TODO:
# * Show how to configure a SearchIO stream so that it generates
# CrossMatchResult objects.
=head1 DESCRIPTION
This object is a subclass of FAST::Bio::Search::Result::GenericResult
and provides some operations that facilitate working with CrossMatch
lib/FAST/Bio/Search/Result/CrossMatchResult.pm view on Meta::CPAN
use base qw(FAST::Bio::Search::Result::GenericResult);
=head2 new
Title : new
Usage : my $obj = FAST::Bio::Search::Result::CrossMatchResult->new();
Function: Builds a new FAST::Bio::Search::Result::CrossMatchResult object
Returns : FAST::Bio::Search::Result::CrossMatchResult
Args : See FAST::Bio::Search::Result::GenericResult();
The following parameters are specific to CrossMatchResult:
-iterations => array ref of FAST::Bio::Search::Iteration::IterationI objects
-inclusion_threshold => e-value threshold for inclusion in the
CrossMatch score matrix model (blastpgp)
=cut
sub new {
my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_iterations'} = [];
$self->{'_iteration_index'} = 0;
$self->{'_iteration_count'} = 0;
my( $iters, $ithresh ) = $self->_rearrange([qw(ITERATIONS
INCLUSION_THRESHOLD)],@args);
$self->{'_inclusion_threshold'} = $ithresh; # This is a read-only variable
if( defined $iters ) {
$self->throw("Must define arrayref of Iterations when initializing a $class\n") unless ref($iters) =~ /array/i;
foreach my $i ( @{$iters} ) {
$self->add_iteration($i);
}
}
else {
# This shouldn't get called with the new SearchIO::blast.
#print STDERR "CrossMatchResult::new(): Not adding iterations.\n";
$self->{'_no_iterations'} = 1;
}
#$self->SUPER::algorithm('cross_match');
return $self;
}
=head2 hits
This method overrides L<FAST::Bio::Search::Result::GenericResult::hits> to take
into account the possibility of multiple iterations, as occurs in CrossMatch reports.
If there are multiple iterations, all 'new' hits for all iterations are returned.
These are the hits that did not occur in a previous iteration.
See Also: L<FAST::Bio::Search::Result::GenericResult::hits>
=cut
sub hits {
my ($self) = shift;
if ($self->{'_no_iterations'}) {
return $self->SUPER::hits;
}
my @hits = ();
foreach my $it ($self->iterations) {
push @hits, $it->hits;
}
return @hits;
}
=head2 next_hit
This method overrides L<FAST::Bio::Search::Result::GenericResult::next_hit> to take
into account the possibility of multiple iterations, as occurs in CrossMatch reports.
If there are multiple iterations, calling next_hit() traverses the
all of the hits, old and new, for each iteration, calling next_hit() on each iteration.
See Also: L<FAST::Bio::Search::Iteration::GenericIteration::next_hit>
=cut
sub next_hit {
my ($self,@args) = @_;
if ($self->{'_no_iterations'}) {
return $self->SUPER::next_hit(@args);
}
my $iter_index;
if (not defined $self->{'_last_hit'}) {
$iter_index = $self->{'_iter_index'} = $self->_next_iteration_index;
} else {
$iter_index = $self->{'_iter_index'};
}
return if $iter_index >= scalar @{$self->{'_iterations'}};
my $it = $self->{'_iterations'}->[$iter_index];
my $hit = $self->{'_last_hit'} = $it->next_hit;
return defined($hit) ? $hit : $self->next_hit;
}
=head2 num_hits
This method overrides L<FAST::Bio::Search::Result::GenericResult::num_hits> to take
into account the possibility of multiple iterations, as occurs in CrossMatch reports.
If there are multiple iterations, calling num_hits() returns the number of
'new' hits for each iteration. These are the hits that did not occur
in a previous iteration.
See Also: L<FAST::Bio::Search::Result::GenericResult::num_hits>
=cut
sub num_hits{
my ($self) = shift;
if ($self->{'_no_iterations'}) {
return $self->SUPER::num_hits;
}
if (not defined $self->{'_iterations'}) {
$self->throw("Can't get Hits: data not collected.");
}
return scalar( $self->hits );
}
=head2 add_iteration
Title : add_iteration
Usage : $report->add_iteration($iteration)
Function: Adds a IterationI to the stored list of iterations
Returns : Number of IterationI currently stored
Args : FAST::Bio::Search::Iteration::IterationI
=cut
sub add_iteration {
my ($self,$i) = @_;
if( $i->isa('FAST::Bio::Search::Iteration::IterationI') ) {
push @{$self->{'_iterations'}}, $i;
$self->{'_iteration_count'}++;
} else {
$self->throw("Passed in a " .ref($i).
" as a Iteration which is not a FAST::Bio::Search::IterationI.");
}
return scalar @{$self->{'_iterations'}};
}
=head2 next_iteration
Title : next_iteration
Usage : while( $it = $result->next_iteration()) { ... }
Function: Returns the next Iteration object, representing all hits
found within a given CrossMatch iteration.
Returns : a FAST::Bio::Search::Iteration::IterationI object or undef if there are no more.
Args : none
=cut
sub next_iteration {
my ($self) = @_;
unless($self->{'_iter_queue_started'}) {
$self->{'_iter_queue'} = [$self->iterations()];
$self->{'_iter_queue_started'} = 1;
}
return shift @{$self->{'_iter_queue'}};
}
=head2 iteration
Usage : $iteration = $blast->iteration( $number );
Purpose : Get an IterationI object for the specified iteration
in the search result (CrossMatch).
Returns : FAST::Bio::Search::Iteration::IterationI object
Throws : FAST::Bio::Root::NoSuchThing exception if $number is not within
range of the number of iterations in this report.
Argument : integer (optional, if not specified get the last iteration)
First iteration = 1
=cut
sub iteration {
my ($self,$num) = @_;
$num = scalar @{$self->{'_iterations'}} unless defined $num;
unless ($num >= 1 and $num <= scalar $self->{'_iteration_count'}) {
$self->throw(-class=>'FAST::Bio::Root::NoSuchThing',
-text=>"No such iteration number: $num. Valid range=1-$self->{'_iteration_count'}",
-value=>$num);
}
return $self->{'_iterations'}->[$num-1];
}
=head2 num_iterations
Usage : $num_iterations = $blast->num_iterations;
Purpose : Get the number of iterations in the search result (CrossMatch).
Returns : Total number of iterations in the report
Argument : none (read-only)
=cut
sub num_iterations { shift->{'_iteration_count'} }
# Methods provided for consistency with BPpsilite.pm (now deprecated);
# these are now merely synonyms
=head2 number_of_iterations
Same as L<num_iterations>.
=cut
sub number_of_iterations { shift->num_iterations }
=head2 round
Same as L<iteration>.
=cut
sub round { shift->iteration(@_) }
=head2 iterations
Title : iterations
Usage : my @iterations = $result->iterations
Function: Returns the IterationI objects contained within this Result
Returns : Array of L<FAST::Bio::Search::Iteration::IterationI> objects
Args : none
=cut
sub iterations {
my $self = shift;
my @its = ();
if( ref($self->{'_iterations'}) =~ /ARRAY/i ) {
@its = @{$self->{'_iterations'}};
}
return @its;
}
=head2 no_hits_found
Usage : $nohits = $blast->no_hits_found( $iteration_number );
Purpose : Get boolean indicator indicating whether or not any hits
were present in the report.
This is NOT the same as determining the number of hits via
the hits() method, which will return zero hits if there were no
hits in the report or if all hits were filtered out during the parse.
Thus, this method can be used to distinguish these possibilities
for hitless reports generated when filtering.
Returns : Boolean
Argument : (optional) integer indicating the iteration number (CrossMatch)
If iteration number is not specified and this is a CrossMatch result,
then this method will return true only if all iterations had
no hits found.
=cut
sub no_hits_found {
my ($self, $round) = @_;
my $result = 0; # final return value of this method.
# Watch the double negative!
# result = 0 means "yes hits were found"
# result = 1 means "no hits were found" (for the indicated iteration or all iterations)
# If a iteration was not specified and there were multiple iterations,
# this method should return true only if all iterations had no hits found.
if( not defined $round ) {
if( $self->{'_iterations'} > 1) {
$result = 1;
foreach my $i( 1..$self->{'_iterations'} ) {
if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
$result = 0;
last;
}
}
}
else {
$result = $self->{"_iteration_1"}->{'_no_hits_found'};
}
}
lib/FAST/Bio/Search/Result/CrossMatchResult.pm view on Meta::CPAN
Since this is an in-memory implementation
Returns : none
Args : none
=cut
sub rewind {
my $self = shift;
$self->SUPER::rewind(@_);
$self->{'_iteration_index'} = 0;
foreach ($self->iterations) {
$_->rewind;
}
}
=head2 inclusion_threshold
Title : inclusion_threshold
Usage : my $incl_thresh = $result->inclusion_threshold; (read-only)
Function: Gets the e-value threshold for inclusion in the CrossMatch
lib/FAST/Bio/Search/Result/ResultI.pm view on Meta::CPAN
my @hits = $self->hits();
eval {@sorted_hits = sort $coderef @hits };
if ($@) {
$self->throw("Unable to sort hits: $@");
}
else {
$self->{'_hits'} = \@sorted_hits;
$self->{'_no_iterations'} = 1; # to bypass iteration checking in hits() method
1;
}
}
=head2 _default sort_hits
Title : _default_sort_hits
Usage : Do not call directly.
Function: Sort hits in descending order by score
Args : None
lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm view on Meta::CPAN
Function: Begins a result event cycle
Returns : none
Args : Type of Report
=cut
sub start_result {
my $self = shift;
#print STDERR "ISREB: start_result()\n";
$self->SUPER::start_result(@_);
$self->{'_iterations'} = [];
$self->{'_iteration_count'} = 0;
$self->{'_old_hit_names'} = undef;
$self->{'_hit_names_below'} = undef;
return;
}
=head2 end_result
Title : end_result
Usage : my @results = $parser->end_result
lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm view on Meta::CPAN
$data->{"RESULT-query_accession"}= $acc;
}
delete $data->{'runid'};
}
my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); }
grep { /^RESULT/ } keys %{$data};
$args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
$data->{'RESULT-algorithm_name'} || $type);
$args{'-iterations'} = $self->{'_iterations'};
my $result = $self->factory('result')->create_object(%args);
$result->hit_factory($self->factory('hit'));
$self->{'_iterations'} = [];
return $result;
}
# Title : _add_hit (private function for internal use only)
# Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
# Argument: FAST::Bio::Search::Hit::HitI object
sub _add_hit {
my ($self, $hit) = @_;
lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm view on Meta::CPAN
sub _store_hit {
my ($self, $hit, $hit_name, $hit_signif) = @_;
my $ithresh = $self->{'_inclusion_threshold'};
# This is the assumption leading to Bug 1986. The assumption here is that
# the hit name is unique (and thus new), therefore any subsequent encounters
# with a hit containing the same name are filed as old hits. This isn't
# always true (see the bug report for a few examples). Adding an explicit
# check for the presence of iterations, adding to new hits otherwise.
if (exists $self->{'_old_hit_names'}->{$hit_name}
&& scalar @{$self->{_iterations}}) {
if (exists $self->{'_hit_names_below'}->{$hit_name}) {
push @{$self->{'_oldhits_below'}}, $hit;
} elsif ($hit_signif <= $ithresh) {
push @{$self->{'_oldhits_newly_below'}}, $hit;
} else {
push @{$self->{'_oldhits_not_below'}}, $hit;
}
} else {
if ($hit_signif <= $ithresh) {
push @{$self->{'_newhits_below'}}, $hit;
lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm view on Meta::CPAN
$args{'-number'} = $self->{'_iteration_count'};
$args{'-oldhits_below'} = $self->{'_oldhits_below'};
$args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
$args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
$args{'-newhits_below'} = $self->{'_newhits_below'};
$args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
$args{'-hit_factory'} = $self->factory('hit');
my $it = $self->factory('iteration')->create_object(%args);
push @{$self->{'_iterations'}}, $it;
return $it;
}
=head2 max_significance
Usage : $obj->max_significance();
Purpose : Set/Get the P or Expect value used as significance screening cutoff.
This is the value of the -signif parameter supplied to new().
Hits with P or E-value above this are skipped.
Returns : Scientific notation number with this format: 1.0e-05.
lib/FAST/Bio/SearchIO/XML/BlastHandler.pm view on Meta::CPAN
'Hsp_pattern-from' => 1,#'patternend',
'Hsp_pattern-to' => 1,#'patternstart',
'Hsp_density' => 1,#'hspdensity',
'Iteration_message' => 1,
'Hit_hsps' => 1,
'BlastOutput_param' => 1,
'Iteration_hits' => 1,
'Statistics' => 1,
'Parameters' => 1,
'BlastOutput' => 1,
'BlastOutput_iterations' => 1,
);
=head2 SAX methods
=cut
=head2 start_document
Title : start_document
Usage : $parser->start_document;
lib/FAST/Bio/SearchIO/XML/PsiBlastHandler.pm view on Meta::CPAN
'Hsp_pattern-from' => 1,#'patternend',
'Hsp_pattern-to' => 1,#'patternstart',
'Hsp_density' => 1,#'hspdensity',
'Iteration_message' => 1,
'Hit_hsps' => 1,
'BlastOutput_param' => 1,
'Iteration_hits' => 1,
'Statistics' => 1,
'Parameters' => 1,
'BlastOutput' => 1,
'BlastOutput_iterations' => 1,
);
=head2 SAX methods
=cut
=head2 start_document
Title : start_document
Usage : $parser->start_document;
lib/FAST/Bio/SearchIO/blast.pm view on Meta::CPAN
-best => boolean. Only process the best hit of each report;
default = false.
=cut
sub _initialize {
my ( $self, @args ) = @_;
$self->SUPER::_initialize(@args);
# Blast reports require a specialized version of the SREB due to the
# possibility of iterations (PSI-BLAST). Forwarding all arguments to it. An
# issue here is that we want to set new default object factories if none are
# supplied.
my $handler = FAST::Bio::SearchIO::IteratedSearchResultEventBuilder->new(@args);
$self->attach_EventHandler($handler);
# 2006-04-26 move this to the attach_handler function in this module so we
# can really reset the handler
# Optimization: caching
# the EventHandler since it is used a lot during the parse.
lib/FAST/Bio/SearchIO/blast.pm view on Meta::CPAN
$self->element(
{
'Name' => 'BlastOutput_rid',
'Data' => $rid
}
);
}
# added Windows workaround for bug 1985
elsif (/^(Searching|Results from round)/) {
next unless $1 =~ /Results from round/;
$self->debug("blast.pm: Possible psi blast iterations found...\n");
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
if ( defined $seeniteration ) {
$self->within_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
$self->start_element( { 'Name' => 'Iteration' } );
}
lib/FAST/Bio/SearchIO/blastxml.pm view on Meta::CPAN
local $/ = "\n";
local $_;
$self->{'_blastdata'} = '';
my ($sawxmlheader, $okaytoprocess);
my $mode = 'header';
my $tail = << 'XML_END';
</BlastOutput_iterations>
</BlastOutput>
XML_END
# no buffering needed (famous last words...)
my $fh = $self->_fh;
#chop up XML into edible bits for the parser
while( defined( my $line = <$fh>) ) {
next if $line =~ m{^\s*</BlastOutput_iterations>}xmso || $line =~ m{^</BlastOutput>}xmso;
if( $line =~ m{^RPS-BLAST}i ) {
$self->{'_type'} = 'RPS-BLAST';
next;
} elsif ($line =~ m{^<\?xml\sversion="1.0"}xms) {# <?xml version="1.0"?> & <?xml version="1.0" encoding="UTF-8"?>
delete $self->{'_header'} if exists $self->{'_header'};
$sawxmlheader++;
$mode = 'header';
} elsif ($line =~ m{^\s*<Iteration>}xmso) {
if (!$sawxmlheader) {
if (defined $tfh) {
lib/FAST/Bio/SearchIO/cross_match.pm view on Meta::CPAN
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
FAST::Bio::SearchIO::cross_match - CrossMatch-specific subclass of FAST::Bio::SearchIO
=head1 SYNOPSIS
# Working with iterations (CrossMatch results)
my $searchIO = FAST::Bio::SearchIO->new( -format => 'cross_match',
-file => "$file.screen.out" )
while(my $r = $searchIO->next_result) {
while(my $hit = $r->next_hit) {
while(my $hsp = $hit->next_hsp) {
#Do the processing here.
}
}
}
# See FAST::Bio::SearchIO for information about working with Results.
lib/FAST/Bio/SeqIO/agave.pm view on Meta::CPAN
Method(s) that this method calls : _helper_store_attribute_list ,
_process_bio_sequence
=cut
sub _process_fragment_orientation {
my ($self, $line, $data_structure) = @_;
# counter to determine the number of iterations within this while loop.
my $count = 0;
# One or more <fragment_orientation>
while ($$line =~ /<fragment_orientation\s?(.*?)\s?>/) {
my $fragment_orientation;
$self->_helper_store_attribute_list($1, \$fragment_orientation);
$$line = $self->_readline;
# One <bio_sequence>
lib/FAST/Bio/SeqIO/agave.pm view on Meta::CPAN
sub _process_annotations {
my ($self, $line) = @_;
# ( seq_feature | gene | comp_result )+
my $annotations;
$$line = $self->_readline;
my $count = 0; # counter to keep track of number of iterations in the loop.
# One or more of these:
while ($$line =~ /<(seq_feature|gene|comp_result)\s?(.*?)\s?>/) {
if ($$line =~ /<seq_feature\s?(.*?)\s?>/) {
my $seq_feature = $self->_process_seq_feature($line, $1);
push @{$annotations->{'seq_feature'}}, $seq_feature;
} elsif ($$line =~ /<gene\s?(.*?)\s?>/) {
lib/FAST/List/Gen.pm view on Meta::CPAN
$source = $src->can('FETCH');
$size = $src->fsize;
$mutable = $src->mutable;
$src->tail_size($size) if $mutable;
}
curse {
FETCH => sub {
my $i = $_[1];
while ($i > $#list) {
$iter++ >= $size
and croak "too many iterations requested: ".
"$iter. index $i out of bounds [0 .. @{[$size - 1]}]";
local *_ = $from ? $list[-1] :
$source ? \$source->(undef, scalar @list) :
\scalar @list;
eval {push @list, map {ref eq 'FAST::List::Gen::Thunk' ? \$$_->() : \$_} $code->(); 1}
or catch_done and do {
if (ref $@) {
push @list, map {ref eq 'FAST::List::Gen::Thunk' ? \$$_->() : \$_} @{$@};
$size = @list;
$$_ = $size for @tails;
lib/FAST/List/Gen.pm view on Meta::CPAN
if (isagen $size) {
$source = tied(@$size)->can('FETCH');
$mutable = $size->is_mutable;
$size = $size->size;
}
curse {
FETCH => sub {
my $i = $_[1];
$i < $pos and croak "non-monotone access of iterate multi stream, idx($i) < pos($pos)";
while ($i >= $pos) {
$pos >= $size and croak "too many iterations requested: ".
"$pos. index $i out of bounds [0 .. @{[$size - 1]}]";
if ($i == $pos and @last) {
$pos++;
last
}
if (@last) {
shift @last;
$pos++;
next;
}