FAST

 view release on metacpan or  search on metacpan

bin/fastr  view on Meta::CPAN


B<fastr --strict> [B<--ambig>=<char>]                  [MULTIFASTA-FILE...]

B<fastr --iupac>  [B<--ambig>=<char>]                  [MULTIFASTA-FILE...]

B<fastr --degap>                                       [MULTIFASTA-FILE...]

=head1 DESCRIPTION

B<fastr> takes multifasta format sequence or alignment data as input,
and faciliates character transliterations on identifiers (by default),
sequences or descriptions using the Perl B<tr///> character
transliteration operator. This faciliates character remapping, case
changes, character deletions, degapping, squashing of repeated
characters, and more. Special modes B<--strict>, B<--iupac>, and B<--degap> automatically direct transliterations on 
facilitate enforcement of sequence alphabets, and remapping illegal
characters to 'N' or 'X' or a user-defined character.

Options specific to B<fastr>:
  B<-s>, B<--sequence>         transliterate sequences
  B<-d>, B<--description>       transliterate descriptions  
  B<-D>, B<--delete>            delete found characters not replaced
  B<-S>, B<--squash>            squash duplicate replaced characters 
  B<-n>, B<--no-replace>        squash/delete characters in searchlist 
  B<-c>, B<--complement>        complement searchlist as a character set

lib/FAST/Bio/Search/Result/CrossMatchResult.pm  view on Meta::CPAN

# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

FAST::Bio::Search::Result::CrossMatchResult - CrossMatch-specific subclass of FAST::Bio::Search::Result::GenericResult

=head1 SYNOPSIS

    # Working with iterations (CrossMatch results)

    $result->next_iteration();
    $result->num_iterations();
    $result->iteration();
    $result->iterations();

# See FAST::Bio::Search::Result::GenericResult for information about working with Results.

# See L<FAST::Bio::Search::Iteration::IterationI|FAST::Bio::Search::Iteration::IterationI>
# for details about working with iterations.

# TODO:
#     * Show how to configure a SearchIO stream so that it generates
#       CrossMatchResult objects.


=head1 DESCRIPTION

This object is a subclass of FAST::Bio::Search::Result::GenericResult
and provides some operations that facilitate working with CrossMatch

lib/FAST/Bio/Search/Result/CrossMatchResult.pm  view on Meta::CPAN

use base qw(FAST::Bio::Search::Result::GenericResult);

=head2 new

 Title   : new
 Usage   : my $obj = FAST::Bio::Search::Result::CrossMatchResult->new();
 Function: Builds a new FAST::Bio::Search::Result::CrossMatchResult object
 Returns : FAST::Bio::Search::Result::CrossMatchResult
 Args    : See FAST::Bio::Search::Result::GenericResult();
           The following parameters are specific to CrossMatchResult:
             -iterations  => array ref of FAST::Bio::Search::Iteration::IterationI objects
             -inclusion_threshold => e-value threshold for inclusion in the
                                     CrossMatch score matrix model (blastpgp)

=cut

sub new {
  my($class,@args) = @_;

  my $self = $class->SUPER::new(@args);

  $self->{'_iterations'} = [];
  $self->{'_iteration_index'} = 0;
  $self->{'_iteration_count'} = 0;

  my( $iters, $ithresh ) = $self->_rearrange([qw(ITERATIONS
                                                 INCLUSION_THRESHOLD)],@args);

  $self->{'_inclusion_threshold'} = $ithresh;  # This is a read-only variable

  if( defined $iters  ) {
      $self->throw("Must define arrayref of Iterations when initializing a $class\n") unless ref($iters) =~ /array/i;

      foreach my $i ( @{$iters} ) {
          $self->add_iteration($i);
      }
  } 
  else {
      # This shouldn't get called with the new SearchIO::blast.
      #print STDERR "CrossMatchResult::new(): Not adding iterations.\n";
      $self->{'_no_iterations'} = 1;
  }

  #$self->SUPER::algorithm('cross_match');
  return $self;
}


=head2 hits

This method overrides L<FAST::Bio::Search::Result::GenericResult::hits> to take 
into account the possibility of multiple iterations, as occurs in CrossMatch reports.

If there are multiple iterations, all 'new' hits for all iterations are returned.
These are the hits that did not occur in a previous iteration.

See Also: L<FAST::Bio::Search::Result::GenericResult::hits>

=cut

sub hits {
   my ($self) = shift;
   if ($self->{'_no_iterations'}) {
       return $self->SUPER::hits;
   }
   my @hits = ();
   foreach my $it ($self->iterations) {
       push @hits, $it->hits;
   }
   return @hits;
}

=head2 next_hit

This method overrides L<FAST::Bio::Search::Result::GenericResult::next_hit> to take 
into account the possibility of multiple iterations, as occurs in CrossMatch reports.

If there are multiple iterations, calling next_hit() traverses the
all of the hits, old and new, for each iteration, calling next_hit() on each iteration. 

See Also: L<FAST::Bio::Search::Iteration::GenericIteration::next_hit>

=cut

sub next_hit {
    my ($self,@args) = @_;
    if ($self->{'_no_iterations'}) {
        return $self->SUPER::next_hit(@args);
    }

    my $iter_index;
    if (not defined $self->{'_last_hit'}) {
        $iter_index = $self->{'_iter_index'} = $self->_next_iteration_index;
    } else {
        $iter_index = $self->{'_iter_index'};
    }

    return if $iter_index >= scalar @{$self->{'_iterations'}};

    my $it = $self->{'_iterations'}->[$iter_index];
    my $hit = $self->{'_last_hit'} = $it->next_hit;

    return defined($hit) ? $hit : $self->next_hit;
}


=head2 num_hits

This method overrides L<FAST::Bio::Search::Result::GenericResult::num_hits> to take 
into account the possibility of multiple iterations, as occurs in CrossMatch reports.

If there are multiple iterations, calling num_hits() returns the number of
'new' hits for each iteration. These are the hits that did not occur
in a previous iteration.

See Also: L<FAST::Bio::Search::Result::GenericResult::num_hits>

=cut

sub num_hits{
   my ($self) = shift;
   if ($self->{'_no_iterations'}) {
       return $self->SUPER::num_hits;
   }
   if (not defined $self->{'_iterations'}) {
       $self->throw("Can't get Hits: data not collected.");
    }
    return scalar( $self->hits );
}

=head2 add_iteration

 Title   : add_iteration
 Usage   : $report->add_iteration($iteration)
 Function: Adds a IterationI to the stored list of iterations
 Returns : Number of IterationI currently stored
 Args    : FAST::Bio::Search::Iteration::IterationI

=cut

sub add_iteration {
    my ($self,$i) = @_;
    if( $i->isa('FAST::Bio::Search::Iteration::IterationI') ) { 
        push @{$self->{'_iterations'}}, $i;
        $self->{'_iteration_count'}++;
    } else { 
        $self->throw("Passed in a " .ref($i). 
                     " as a Iteration which is not a FAST::Bio::Search::IterationI.");
    }
    return scalar @{$self->{'_iterations'}};
}


=head2 next_iteration

 Title   : next_iteration
 Usage   : while( $it = $result->next_iteration()) { ... }
 Function: Returns the next Iteration object, representing all hits
           found within a given CrossMatch iteration.
 Returns : a FAST::Bio::Search::Iteration::IterationI object or undef if there are no more.
 Args    : none

=cut

sub next_iteration {
    my ($self) = @_;

   unless($self->{'_iter_queue_started'}) {
       $self->{'_iter_queue'} = [$self->iterations()];
       $self->{'_iter_queue_started'} = 1;
   }
   return shift @{$self->{'_iter_queue'}};
}

=head2 iteration

 Usage     : $iteration = $blast->iteration( $number );
 Purpose   : Get an IterationI object for the specified iteration
             in the search result (CrossMatch).
 Returns   : FAST::Bio::Search::Iteration::IterationI object
 Throws    : FAST::Bio::Root::NoSuchThing exception if $number is not within 
             range of the number of iterations in this report.
 Argument  : integer (optional, if not specified get the last iteration)
             First iteration = 1

=cut

sub iteration {
    my ($self,$num) = @_;
    $num = scalar @{$self->{'_iterations'}} unless defined $num;
    unless ($num >= 1 and $num <= scalar $self->{'_iteration_count'}) {
        $self->throw(-class=>'FAST::Bio::Root::NoSuchThing',
                     -text=>"No such iteration number: $num. Valid range=1-$self->{'_iteration_count'}",
                     -value=>$num);
    }
    return $self->{'_iterations'}->[$num-1];
}

=head2 num_iterations

 Usage     : $num_iterations = $blast->num_iterations; 
 Purpose   : Get the number of iterations in the search result (CrossMatch).
 Returns   : Total number of iterations in the report
 Argument  : none (read-only)

=cut

sub num_iterations { shift->{'_iteration_count'} }

# Methods provided for consistency with BPpsilite.pm (now deprecated);
# these are now merely synonyms

=head2 number_of_iterations

Same as L<num_iterations>.

=cut

sub number_of_iterations { shift->num_iterations }

=head2 round

Same as L<iteration>.

=cut

sub round { shift->iteration(@_) }


=head2 iterations

 Title   : iterations
 Usage   : my @iterations = $result->iterations
 Function: Returns the IterationI objects contained within this Result
 Returns : Array of L<FAST::Bio::Search::Iteration::IterationI> objects
 Args    : none

=cut

sub iterations { 
    my $self = shift;
    my @its = ();
    if( ref($self->{'_iterations'}) =~ /ARRAY/i ) {
       @its = @{$self->{'_iterations'}};
    }
    return @its;
}

=head2 no_hits_found

 Usage     : $nohits = $blast->no_hits_found( $iteration_number );
 Purpose   : Get boolean indicator indicating whether or not any hits
             were present in the report.

             This is NOT the same as determining the number of hits via
             the hits() method, which will return zero hits if there were no
             hits in the report or if all hits were filtered out during the parse.

             Thus, this method can be used to distinguish these possibilities
             for hitless reports generated when filtering.

 Returns   : Boolean
 Argument  : (optional) integer indicating the iteration number (CrossMatch)
             If iteration number is not specified and this is a CrossMatch result,
             then this method will return true only if all iterations had
             no hits found.

=cut

sub no_hits_found {
    my ($self, $round) = @_;

    my $result = 0;   # final return value of this method.
    # Watch the double negative! 
    # result = 0 means "yes hits were found"
    # result = 1 means "no hits were found" (for the indicated iteration or all iterations)

    # If a iteration was not specified and there were multiple iterations,
    # this method should return true only if all iterations had no hits found.
    if( not defined $round ) {
        if( $self->{'_iterations'} > 1) {
            $result = 1;
            foreach my $i( 1..$self->{'_iterations'} ) {
                if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
                    $result = 0;
                    last;
                }
            }
        }
        else {
            $result = $self->{"_iteration_1"}->{'_no_hits_found'};
        }
    }

lib/FAST/Bio/Search/Result/CrossMatchResult.pm  view on Meta::CPAN

           Since this is an in-memory implementation
 Returns : none
 Args    : none

=cut

sub rewind {
   my $self = shift;
   $self->SUPER::rewind(@_);
   $self->{'_iteration_index'} = 0;
   foreach ($self->iterations) {
       $_->rewind;
   }
}


=head2 inclusion_threshold

 Title   : inclusion_threshold
 Usage   : my $incl_thresh = $result->inclusion_threshold; (read-only)
 Function: Gets the e-value threshold for inclusion in the CrossMatch 

lib/FAST/Bio/Search/Result/ResultI.pm  view on Meta::CPAN


    my @hits = $self->hits();
    
    eval {@sorted_hits = sort $coderef @hits };

   if ($@) {
       $self->throw("Unable to sort hits: $@");
   }
   else {
       $self->{'_hits'} = \@sorted_hits;
       $self->{'_no_iterations'} = 1; # to bypass iteration checking in hits() method
       1;
   }
}

=head2 _default sort_hits

  Title	: _default_sort_hits
  Usage	: Do not call directly.
  Function: Sort hits in descending order by score
  Args	: None

lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm  view on Meta::CPAN

 Function: Begins a result event cycle
 Returns : none 
 Args    : Type of Report

=cut

sub start_result {
   my $self = shift;
   #print STDERR "ISREB: start_result()\n";
   $self->SUPER::start_result(@_);
   $self->{'_iterations'} = [];
   $self->{'_iteration_count'} = 0;
   $self->{'_old_hit_names'} = undef;
   $self->{'_hit_names_below'} = undef;
   return;
}

=head2 end_result

 Title   : end_result
 Usage   : my @results = $parser->end_result

lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm  view on Meta::CPAN

            $data->{"RESULT-query_accession"}= $acc;
        }
        delete $data->{'runid'};
    }
    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
               grep { /^RESULT/ } keys %{$data};
    
    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
                               $data->{'RESULT-algorithm_name'} || $type);

    $args{'-iterations'} = $self->{'_iterations'};

    my $result = $self->factory('result')->create_object(%args);
    $result->hit_factory($self->factory('hit'));
    $self->{'_iterations'} = [];
    return $result;
}


# Title   : _add_hit (private function for internal use only)
# Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
# Argument: FAST::Bio::Search::Hit::HitI object 

sub _add_hit {
    my ($self, $hit) = @_;

lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm  view on Meta::CPAN


sub _store_hit {
    my ($self, $hit, $hit_name, $hit_signif) = @_;

    my $ithresh = $self->{'_inclusion_threshold'};
    
    # This is the assumption leading to Bug 1986. The assumption here is that
    # the hit name is unique (and thus new), therefore any subsequent encounters
    # with a hit containing the same name are filed as old hits. This isn't
    # always true (see the bug report for a few examples). Adding an explicit
    # check for the presence of iterations, adding to new hits otherwise.
    
    if (exists $self->{'_old_hit_names'}->{$hit_name}
        && scalar @{$self->{_iterations}}) {
        if (exists $self->{'_hit_names_below'}->{$hit_name}) {
            push @{$self->{'_oldhits_below'}}, $hit;
        } elsif ($hit_signif <= $ithresh) {
            push @{$self->{'_oldhits_newly_below'}}, $hit;
        } else {
            push @{$self->{'_oldhits_not_below'}}, $hit;
        }
    } else {
        if ($hit_signif <= $ithresh) {
            push @{$self->{'_newhits_below'}}, $hit;

lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm  view on Meta::CPAN


    $args{'-number'} = $self->{'_iteration_count'};
    $args{'-oldhits_below'} = $self->{'_oldhits_below'};
    $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
    $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
    $args{'-newhits_below'} = $self->{'_newhits_below'};
    $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
    $args{'-hit_factory'} = $self->factory('hit');

    my $it = $self->factory('iteration')->create_object(%args);
    push @{$self->{'_iterations'}}, $it;
    return $it;
}

=head2 max_significance

 Usage     : $obj->max_significance();
 Purpose   : Set/Get the P or Expect value used as significance screening cutoff.
             This is the value of the -signif parameter supplied to new().
             Hits with P or E-value above this are skipped.
 Returns   : Scientific notation number with this format: 1.0e-05.

lib/FAST/Bio/SearchIO/XML/BlastHandler.pm  view on Meta::CPAN

                'Hsp_pattern-from'     => 1,#'patternend',
                'Hsp_pattern-to'       => 1,#'patternstart',
                'Hsp_density'          => 1,#'hspdensity',
                'Iteration_message'    => 1,
                'Hit_hsps'             => 1,
                'BlastOutput_param'    => 1,
                'Iteration_hits'       => 1,
                'Statistics'           => 1,
                'Parameters'           => 1,
                'BlastOutput'          => 1,
                'BlastOutput_iterations' => 1,     
                   );

=head2 SAX methods

=cut

=head2 start_document

 Title   : start_document
 Usage   : $parser->start_document;

lib/FAST/Bio/SearchIO/XML/PsiBlastHandler.pm  view on Meta::CPAN

                'Hsp_pattern-from'     => 1,#'patternend',
                'Hsp_pattern-to'       => 1,#'patternstart',
                'Hsp_density'          => 1,#'hspdensity',
                'Iteration_message'    => 1,
                'Hit_hsps'             => 1,
                'BlastOutput_param'    => 1,
                'Iteration_hits'       => 1,
                'Statistics'           => 1,
                'Parameters'           => 1,
                'BlastOutput'          => 1,
                'BlastOutput_iterations' => 1,
                   );

=head2 SAX methods

=cut

=head2 start_document

 Title   : start_document
 Usage   : $parser->start_document;

lib/FAST/Bio/SearchIO/blast.pm  view on Meta::CPAN

           -best        => boolean. Only process the best hit of each report;
                           default = false.

=cut

sub _initialize {
    my ( $self, @args ) = @_;
    $self->SUPER::_initialize(@args);

    # Blast reports require a specialized version of the SREB due to the
    # possibility of iterations (PSI-BLAST). Forwarding all arguments to it. An
    # issue here is that we want to set new default object factories if none are
    # supplied.

    my $handler = FAST::Bio::SearchIO::IteratedSearchResultEventBuilder->new(@args);
    $self->attach_EventHandler($handler);
    
    # 2006-04-26 move this to the attach_handler function in this module so we
    # can really reset the handler 
    # Optimization: caching
    # the EventHandler since it is used a lot during the parse.

lib/FAST/Bio/SearchIO/blast.pm  view on Meta::CPAN

            $self->element(
                {
                    'Name' => 'BlastOutput_rid',
                    'Data' => $rid
                }
            );
        }
        # added Windows workaround for bug 1985
        elsif (/^(Searching|Results from round)/) { 
            next unless $1 =~ /Results from round/; 
            $self->debug("blast.pm: Possible psi blast iterations found...\n");
            
            $self->in_element('hsp')
              && $self->end_element( { 'Name' => 'Hsp' } );
            $self->in_element('hit')
              && $self->end_element( { 'Name' => 'Hit' } );
            if ( defined $seeniteration ) {
                $self->within_element('iteration')
                  && $self->end_element( { 'Name' => 'Iteration' } );
                $self->start_element( { 'Name' => 'Iteration' } );
            }

lib/FAST/Bio/SearchIO/blastxml.pm  view on Meta::CPAN

    
    local $/ = "\n";
    local $_;
    $self->{'_blastdata'} = '';
    
    my ($sawxmlheader, $okaytoprocess);
    
    my $mode = 'header';

    my $tail = << 'XML_END';
  </BlastOutput_iterations>
</BlastOutput>
XML_END

    # no buffering needed (famous last words...)
    my $fh = $self->_fh;
    
    #chop up XML into edible bits for the parser
    while( defined( my $line = <$fh>) ) {
        next if $line =~ m{^\s*</BlastOutput_iterations>}xmso || $line =~ m{^</BlastOutput>}xmso;
        if( $line =~ m{^RPS-BLAST}i ) {
            $self->{'_type'} = 'RPS-BLAST';
            next;
        } elsif ($line =~ m{^<\?xml\sversion="1.0"}xms) {# <?xml version="1.0"?> & <?xml version="1.0" encoding="UTF-8"?>
            delete $self->{'_header'} if exists $self->{'_header'};
            $sawxmlheader++;
            $mode = 'header';
        } elsif ($line =~ m{^\s*<Iteration>}xmso) {
            if (!$sawxmlheader) {
                if (defined $tfh) {

lib/FAST/Bio/SearchIO/cross_match.pm  view on Meta::CPAN

# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

FAST::Bio::SearchIO::cross_match - CrossMatch-specific subclass of FAST::Bio::SearchIO

=head1 SYNOPSIS

    # Working with iterations (CrossMatch results)
    my $searchIO = FAST::Bio::SearchIO->new( -format => 'cross_match',
                            -file   => "$file.screen.out" )
    while(my $r = $searchIO->next_result) {
      while(my $hit = $r->next_hit) {
	while(my $hsp = $hit->next_hsp) {
           #Do the processing here.
        }
      }
    }
# See FAST::Bio::SearchIO for information about working with Results.

lib/FAST/Bio/SeqIO/agave.pm  view on Meta::CPAN

Method(s) that this method calls : _helper_store_attribute_list ,
_process_bio_sequence

=cut

sub _process_fragment_orientation {


    my ($self, $line, $data_structure) = @_;

    # counter to determine the number of iterations within this while loop.
    my $count = 0;

    # One or more <fragment_orientation>
    while ($$line =~ /<fragment_orientation\s?(.*?)\s?>/) {

        my $fragment_orientation;
        $self->_helper_store_attribute_list($1, \$fragment_orientation);
        $$line = $self->_readline;

        # One <bio_sequence>

lib/FAST/Bio/SeqIO/agave.pm  view on Meta::CPAN


sub _process_annotations {

    my ($self, $line) = @_;
    # ( seq_feature | gene | comp_result )+

    my $annotations;

    $$line = $self->_readline;

    my $count = 0;              # counter to keep track of number of iterations in the loop.

    # One or more of these:
    while ($$line =~ /<(seq_feature|gene|comp_result)\s?(.*?)\s?>/) {

        if ($$line =~ /<seq_feature\s?(.*?)\s?>/) {

            my $seq_feature = $self->_process_seq_feature($line, $1);
            push @{$annotations->{'seq_feature'}}, $seq_feature;

        } elsif ($$line =~ /<gene\s?(.*?)\s?>/) {

lib/FAST/List/Gen.pm  view on Meta::CPAN

            $source  = $src->can('FETCH');
            $size    = $src->fsize;
            $mutable = $src->mutable;
            $src->tail_size($size) if $mutable;
        }
        curse {
            FETCH => sub {
                my $i = $_[1];
                while ($i > $#list) {
                    $iter++ >= $size
                        and croak "too many iterations requested: ".
                                  "$iter. index $i out of bounds [0 .. @{[$size - 1]}]";
                    local *_ = $from   ? $list[-1] :
                               $source ? \$source->(undef, scalar @list) :
                               \scalar @list;
                    eval {push @list, map {ref eq 'FAST::List::Gen::Thunk' ? \$$_->() : \$_} $code->(); 1}
                      or catch_done and do {
                        if (ref $@) {
                          push @list, map {ref eq 'FAST::List::Gen::Thunk' ? \$$_->() : \$_} @{$@};
                          $size = @list;
                          $$_ = $size for @tails;

lib/FAST/List/Gen.pm  view on Meta::CPAN

        if (isagen $size) {
            $source  = tied(@$size)->can('FETCH');
            $mutable = $size->is_mutable;
            $size    = $size->size;
        }
        curse {
            FETCH => sub {
                my $i = $_[1];
                $i < $pos and croak "non-monotone access of iterate multi stream, idx($i) < pos($pos)";
                while ($i >= $pos) {
                     $pos >= $size and croak "too many iterations requested: ".
                                            "$pos. index $i out of bounds [0 .. @{[$size - 1]}]";
                    if ($i == $pos and @last) {
                        $pos++;
                        last
                    }
                    if (@last) {
                        shift @last;
                        $pos++;
                        next;
                    }



( run in 1.710 second using v1.01-cache-2.11-cpan-71847e10f99 )