BioPerl

 view release on metacpan or  search on metacpan

BUGS  view on Meta::CPAN

2686    WU-BLAST XML support
        (partially implemented, but issues remain)
2691    Bio::Microarray::Tools::ReseqChip depends on CPAN module Statistics::Frequency
        (related to bug 2332 above)
2696    global verbosity does not propagate to new objects post-set
        (requires more specific implementation details)
2700    Refactor Build.PL
        (some of the behind-the-scenes stuff is a little klunky)
2702    Scripts recopied upon each call to './Build test'
        (minor bug)
2703    Bio::Tools::GuessSeqFormat guesses SELEX as PHYLIP
        (minor bug that mistakes format)
2707    Bio::Tools::Run::StandAloneBlast does not quote shell metacharacters in
        filenames, but Bio::SearchIO::blast does
        (bug within StandAloneBlast)
2715    LocatableSeq symbols are globally set
        (bug related to sequence symbol issues; rarely surfaces but needs addressing)


Bioperl 1.5.2
=============

Bio/Align/AlignI.pm  view on Meta::CPAN

 Title     : consensus_iupac
 Usage     : $str = $ali->consensus_iupac()
 Function  :

             Makes a consensus using IUPAC ambiguity codes from DNA
             and RNA. The output is in upper case except when gaps in
             a column force output to be in lower case.

             Note that if your alignment sequences contain a lot of
             IUPAC ambiquity codes you often have to manually set
             alphabet.  Bio::PrimarySeq::_guess_type thinks they
             indicate a protein sequence.

 Returns   : consensus string
 Argument  : none
 Throws    : on protein sequences


=cut

sub consensus_iupac {

Bio/AlignIO.pm  view on Meta::CPAN

    # object?
    if( $class =~ /Bio::AlignIO::(\S+)/ ) {
	my ($self) = $class->SUPER::new(@args);
	$self->_initialize(@args);
	return $self;
    } else {

	my %param = @args;
	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
	my $format = $param{'-format'} ||
	    $class->_guess_format( $param{-file} || $ARGV[0] );
        unless ($format) {
            if ($param{-file}) {
                $format = Bio::Tools::GuessSeqFormat->new(-file => $param{-file}||$ARGV[0] )->guess;
            }
            elsif ($param{-fh}) {
                $format = Bio::Tools::GuessSeqFormat->new(-fh => $param{-fh}||$ARGV[0] )->guess;
            }
        }
	$format = "\L$format";	# normalize capitalization to lower case
        $class->throw("Unknown format given or could not determine it [$format]")
            unless $format;

	return unless( $class->_load_format_module($format) );
	return "Bio::AlignIO::$format"->new(@args);
    }
}

Bio/AlignIO.pm  view on Meta::CPAN

 Returns : 1 for success and 0 for error
 Args    : Bio::Seq object

=cut

sub write_aln {
    my ($self,$aln) = @_;
    $self->throw("Sorry, you cannot write to a generic Bio::AlignIO object.");
}

=head2 _guess_format

 Title   : _guess_format
 Usage   : $obj->_guess_format($filename)
 Function:
 Example :
 Returns : guessed format of filename (lower case)
 Args    :

=cut

sub _guess_format {
my $class = shift;
   return unless $_ = shift;
   return 'clustalw'    if /\.aln$/i;
   return 'emboss'      if /\.(water|needle)$/i;
   return 'metafasta'   if /\.metafasta$/;
   return 'fasta'       if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
   return 'maf'         if /\.maf/i;
   return 'mega'        if /\.(meg|mega)$/i;
   return 'meme'        if /\.meme$/i;
   return 'msf'         if /\.(msf|pileup|gcg)$/i;

Bio/AlignIO/stockholm.pm  view on Meta::CPAN


=head2 new

 Title   : new
 Usage   : my $alignio = Bio::AlignIO->new(-format => 'stockholm'
					  -file   => '>file');
 Function: Initialize a new L<Bio::AlignIO::stockholm> reader or writer
 Returns : L<Bio::AlignIO> object
 Args    : -line_length :  length of the line for the alignment block
           -alphabet    :  symbol alphabet to set the sequences to.  If not set,
                           the parser will try to guess based on the alignment
                           accession (if present), defaulting to 'dna'.
           -spaces      :  (optional, def = 1) boolean to add a space in between
                           the "# STOCKHOLM 1.0" header and the annotation and
                           the annotation and the alignment.

=cut

sub _initialize {
    my ( $self, @args ) = @_;
    $self->SUPER::_initialize(@args);

Bio/Annotation/AnnotationFactory.pm  view on Meta::CPAN

use base qw(Bio::Root::Root Bio::Factory::ObjectFactoryI);

=head2 new

 Title   : new
 Usage   : my $obj = Bio::Annotation::AnnotationFactory->new();
 Function: Builds a new Bio::Annotation::AnnotationFactory object 
 Returns : Bio::Annotation::AnnotationFactory
 Args    : -type => string, name of a L<Bio::AnnotationI> derived class.

If type is not set the module guesses it based on arguments passed to
method L<create_object>.

=cut

sub new {
    my($class,@args) = @_;

    my $self = $class->SUPER::new(@args);
  
    my ($type) = $self->_rearrange([qw(TYPE)], @args);

Bio/Annotation/AnnotationFactory.pm  view on Meta::CPAN

 Args    : initialization parameters specific to the type of annotation
           object we want.

=cut

sub create_object {
   my ($self,@args) = @_;

   my $type = $self->type; 
   if(! $type) {
       # we need to guess this
       $type = $self->_guess_type(@args);
       if(! $type) {
       $self->throw("No annotation type set and unable to guess.");
       }
       # load dynamically if it hasn't been loaded yet
       if(! $self->{'_loaded_types'}->{$type}) {
       eval {
           $self->_load_module($type);
           $self->{'_loaded_types'}->{$type} = 1;
       };
       if($@) {
           $self->throw("Bio::AnnotationI implementation $type ".
                "failed to load: ".$@);

Bio/Annotation/AnnotationFactory.pm  view on Meta::CPAN

        $self->throw("'$type' does not implement Bio::AnnotationI. ".
                 "Too bad.");
        }
        $self->{'_loaded_types'}->{$type} = 1;
    }
    return $self->{'type'} = $type;
    }
    return $self->{'type'};
}

=head2 _guess_type

 Title   : _guess_type
 Usage   :
 Function: Guesses the right type of L<Bio::AnnotationI> implementation
           based on initialization parameters for the prospective
           object.
 Example :
 Returns : the type (a string, the module name)
 Args    : initialization parameters to be passed to the prospective
           cluster object


=cut

sub _guess_type{
    my ($self,@args) = @_;
    my $type;

    # we can only guess from a certain number of arguments
    my ($val, $db, $text, $name, $authors, $start, $tree, $node) =
    $self->_rearrange([qw(VALUE
                  DATABASE
                  TEXT
                  NAME
                  AUTHORS
                  START
                  TREE_OBJ
                  NODE
                  )], @args);

Bio/Annotation/TagTree.pm  view on Meta::CPAN


1) an array reference corresponding to the data structure for Data::Stag;

2) a text string in 'xml', 'itext', 'spxr', or 'indent' format. The default
format is 'xml'; this can be changed using tagformat() prior to using value() or
by passing in the proper format using '-tagformat' upon instantiation;

3) another Bio::Annotation::TagTree or Data::Stag node instance.  In both cases
a deep copy (duplicate) of the instance is generated.

Beyond checking for an array reference no format guessing occurs (so, for
roundtrip tests ensure that the IO formats correspond). For now, we recommend
when using text input to set tagformat() to one of these formats prior to data
loading to ensure the proper Data::Stag parser is selected. After data loading,
the tagformat() can be changed to change the text string format returned by
value(). (this may be rectified in the future)

This Annotation type is fully BioSQL compatible and could be considered a
temporary replacement for nested Bio::Annotation::Collections, at least until
BioSQL and bioperl-db can support nested annotation collections.

Bio/Annotation/TagTree.pm  view on Meta::CPAN

                eval { $self->{db} = Data::Stag->nodify($value) };
            }
            else {

                # assuming this is blessed; passing on to node() and copy
                $self->node( $value, 'copy' );
            }
        }
        else {

            # not trying to guess here for now; we go by the tagformat() setting
            my $h = Data::Stag->getformathandler($format);
            eval { $self->{db} = Data::Stag->from( $format . 'str', $value ) };
        }
        $self->throw("Data::Stag error:\n$@") if $@;
    }

    # get mode?
    # How do we return a data structure?
    # for now, we use the output (if there is a Data::Stag node present)
    # may need to run an eval {} to catch Data::Stag output errors

Bio/Assembly/Contig.pm  view on Meta::CPAN

 Title     : consensus_iupac
 Usage     : $str = $contig->consensus_iupac()
 Function  :

             Makes a consensus using IUPAC ambiguity codes from DNA
             and RNA. The output is in upper case except when gaps in
             a column force output to be in lower case.

             Note that if your alignment sequences contain a lot of
             IUPAC ambiquity codes you often have to manually set
             alphabet.  Bio::PrimarySeq::_guess_type thinks they
             indicate a protein sequence.

 Returns   : consensus string
 Argument  : none
 Throws    : on protein sequences


=cut

sub consensus_iupac {

Bio/Assembly/IO.pm  view on Meta::CPAN

        return $self;
    } else {

        my %param = @args;
        @param{ map { lc $_ } keys %param } = values %param; # lowercase keys

        $class->throw("Need at least a file name to proceed!")
            unless (defined $param{'-file'} || defined $ARGV[0]);

        my $format = $param{'-format'} ||
        $class->_guess_format( $param{-file} || $ARGV[0] );
        $format = "\L$format";	# normalize capitalization to lower case

        if ($format =~ /-/) {
            ($format, my $variant) = split('-', $format, 2);
            push @args, (-variant => $variant);
        }

        return unless( $class->_load_format_module($format) );
        return "Bio::Assembly::IO::$format"->new(@args);
    }

Bio/Assembly/IO.pm  view on Meta::CPAN

  if ( $@ ) {
    print STDERR <<END;
$self: could not load $format - for more details on supported formats please see the Assembly::IO docs
Exception $@
END
  ;
  }
  return $ok;
}

=head2 _guess_format

 Title   : _guess_format
 Usage   : $obj->_guess_format($filename)
 Function: guess format based on file suffix
 Example :
 Returns : guessed format of filename (lower case)
 Args    :
 Notes   : formats that _filehandle() will guess includes
           ace, phrap and tigr at the moment

=cut

sub _guess_format {
   my $class = shift;
   my $arg   = shift;

   return unless defined($arg);
   return 'ace'    if ($arg =~ /\.ace/i);
   return 'phrap'  if ($arg =~ /\.phrap/i);
   return 'tigr'   if ($arg =~ /\.tigr/i);
   return 'maq'    if ($arg =~ /\.maq/i);
   return 'sam'    if ($arg =~ /\.[bs]am/i);
   return 'bowtie' if ($arg =~ /\.bowtie/i);

Bio/Assembly/IO/bowtie.pm  view on Meta::CPAN

    my ($file, $index, $no_head, $no_sq) = $self->_rearrange([qw(FILE INDEX NO_HEAD NO_SQ)], @args);
    $file =~ s/^<//;
    $self->{'_no_head'} = $no_head;
    $self->{'_no_sq'} = $no_sq;

    # get the sequence so Bio::DB::Sam can work with it
    my $refdb;
    my $inspector;
    if (-e $index && -r _ ) {
        $refdb = ($index =~ m/\.gz[^.]*$/) ? $self->_uncompress($index) : $index;
        my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$refdb);
        $self->throw("'$index' is not a fasta file.")
            unless $guesser->guess =~ m/^fasta$/;
    } elsif ($HAVE_BOWTIE) {
        $inspector = Bio::Tools::Run::Bowtie->new( -command => 'inspect' );
        $refdb = $inspector->run($index);
    } else {
        $self->throw("Bio::Tools::Run::Bowtie is not available - cannot extract refdb from index.");
    }

    my $bam_file = $self->_make_bam($self->_bowtie_to_sam($file, $refdb));
    my $sam = Bio::Assembly::IO->new( -file => "<$bam_file", -refdb => $refdb , -format => 'sam' );

Bio/Assembly/IO/bowtie.pm  view on Meta::CPAN

        unless ( -e $file && -r _ );

    if ($file =~ m/\.gz[^.]*$/) {
        $file = $self->_uncompress($file);
        $self->close;
        open my $fh, '<', $file or $self->throw("Could not read file '$file': $!");
        $self->file($file);
        $self->_fh($fh);
    }

    my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$file);
    $self->throw("'$file' is not a bowtie formatted file.") unless $guesser->guess =~ m/^bowtie$/;

    my %SQ;
    my $mapq = 255;
    my $in_pair;
    my @mate_line;
    my $mlen;

    # create temp file for working
    my ($sam_tmp_h, $sam_tmp_f) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.sam' );
    

Bio/Cluster/ClusterFactory.pm  view on Meta::CPAN

# POD documentation - main docs before the code

=head1 NAME

Bio::Cluster::ClusterFactory - Instantiates a new Bio::ClusterI (or derived class) through a factory

=head1 SYNOPSIS

    use Bio::Cluster::ClusterFactory;
    # if you don't provide a default type, the factory will try
    # some guesswork based on display_id and namespace
    my $factory = Bio::Cluster::ClusterFactory->new(-type => 'Bio::Cluster::UniGene');
    my $clu = $factory->create_object(-description => 'NAT',
                                      -display_id  => 'Hs.2');


=head1 DESCRIPTION

This object will build L<Bio::ClusterI> objects generically.

=head1 FEEDBACK

Bio/Cluster/ClusterFactory.pm  view on Meta::CPAN


use base qw(Bio::Factory::ObjectFactory);

=head2 new

 Title   : new
 Usage   : my $obj = Bio::Cluster::ClusterFactory->new();
 Function: Builds a new Bio::Cluster::ClusterFactory object 
 Returns : Bio::Cluster::ClusterFactory
 Args    : -type => string, name of a ClusterI derived class.
                    If not provided, the factory will have to guess
                    from ID and namespace, which may or may not be
                    successful.

=cut

sub new {
    my($class,@args) = @_;

    my $self = $class->SUPER::new(@args);

Bio/Cluster/ClusterFactory.pm  view on Meta::CPAN

           -description => description of the cluster
           -members     => arrayref, members of the cluster

=cut

sub create_object {
   my ($self,@args) = @_;

   my $type = $self->type();
   if(! $type) {
       # we need to guess this
       $type = $self->_guess_type(@args);
       $self->throw("No cluster type set and unable to guess.") unless $type;
       $self->type($type);
   }
   return $type->new(-verbose => $self->verbose, @args);
}

=head2 _guess_type

 Title   : _guess_type
 Usage   :
 Function: Guesses the right type of L<Bio::ClusterI> implementation
           based on initialization parameters for the prospective
           object.
 Example :
 Returns : the type (a string, the module name)
 Args    : initialization parameters to be passed to the prospective
           cluster object


=cut

sub _guess_type{
    my ($self,@args) = @_;
    my $type;

    # we can only guess from a certain number of arguments
    my ($dispid, $ns, $members) =
	$self->_rearrange([qw(DISPLAY_ID
			      NAMESPACE
			      MEMBERS
			      )], @args);
    # Unigene namespace or ID?
    if($ns && (lc($ns) eq "unigene")) {
	$type = 'Bio::Cluster::UniGene';
    } elsif($dispid && ($dispid =~ /^Hs\.[0-9]/)) {
	$type = 'Bio::Cluster::UniGene';

Bio/ClusterIO.pm  view on Meta::CPAN

    # object?
    if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
	my ($self) = $class->SUPER::new(@args);	
	$self->_initialize(@args);
	return $self;
    } else { 

	my %param = @args;
	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
	my $format = $param{'-format'} || 
	    $class->_guess_format( $param{-file} || $ARGV[0] );
	$format = "\L$format";	# normalize capitalization to lower case

	return unless( $class->_load_format_module($format) );
	return "Bio::ClusterIO::$format"->new(@args);
    }
}

=head2 format

 Title   : format

Bio/ClusterIO.pm  view on Meta::CPAN

  if ( $@ ) {
    print STDERR <<END;
$self: could not load $format - for more details on supported formats please see the ClusterIO docs
Exception $@
END
  ;
  }
  return $ok;
}

=head2 _guess_format

 Title   : _guess_format
 Usage   : $obj->_guess_format($filename)
 Function: guess format based on file suffix
 Example :
 Returns : guessed format of filename (lower case)
 Args    :
 Notes   : formats that _filehandle() will guess include unigene and dbsnp

=cut

sub _guess_format {
   my $class = shift;
   return unless $_ = shift;
   return 'unigene'   if /\.(data)$/i;
   return 'dbsnp'     if /\.(xml)$/i;
}

sub DESTROY {
    my $self = shift;

    $self->close();

Bio/DB/Fasta.pm  view on Meta::CPAN

=head1 DESCRIPTION

Bio::DB::Fasta provides indexed access to a single Fasta file, several files,
or a directory of files. It provides persistent random access to each sequence
entry (either as a Bio::PrimarySeqI-compliant object or a string), and to
subsequences within each entry, allowing you to retrieve portions of very large
sequences without bringing the entire sequence into memory. Bio::DB::Fasta is
based on Bio::DB::IndexedBase. See this module's documentation for details.

The Fasta files may contain any combination of nucleotide and protein sequences;
during indexing the module guesses the molecular type. Entries may have any line
length up to 65,536 characters, and different line lengths are allowed in the
same file.  However, within a sequence entry, all lines must be the same length
except for the last. An error will be thrown if this is not the case.

The module uses /^E<gt>(\S+)/ to extract the primary ID of each sequence
from the Fasta header. See -makeid in Bio::DB::IndexedBase to pass a callback
routine to reversibly modify this primary ID, e.g. if you wish to extract a
specific portion of the gi|gb|abc|xyz GenBank IDs.

=head1 DATABASE CREATION AND INDEXING

Bio/DB/Fasta.pm  view on Meta::CPAN

                        "length except the last. Line above #$. '$fap' is $l2_len".
                        " != $l3_len chars.");
                }
                if ($blank_lines) {
                    # Blank lines not allowed in entry
                    $self->throw("Blank lines can only precede header lines, ".
                        "found preceding line #$.");
                }
            }
            $linelen  ||= length $line;
            $alphabet ||= $self->_guess_alphabet($line);
            $seq_lines++;
        }
        $last_line = $line;
    }

    # Process last entry
    $self->_check_linelength($linelen);
    my $pos = tell $fh;
    if (@ids) {
        my $strlen   = $pos - $offset;

Bio/DB/Flat/BinarySearch.pm  view on Meta::CPAN

        $self->record_size($record_width);
    }
    $format ||= DEFAULT_FORMAT;
    $self->format($format);
    $self->write_flag($write_flag);

    if ( $self->write_flag && !$primary_namespace ) {
        (
            $primary_namespace, $primary_pattern,
            $start_pattern,     $secondary_patterns
        ) = $self->_guess_patterns( $self->format );
    }

    $self->primary_pattern($primary_pattern);
    $self->primary_namespace($primary_namespace);
    $self->start_pattern($start_pattern);
    $self->secondary_patterns($secondary_patterns);

    return $self;
}

Bio/DB/Flat/BinarySearch.pm  view on Meta::CPAN

    # "length $_" will report 4 although the line is 5 bytes in length.
    # We assume that all lines have the same line separator and only read current line.
    my $init_pos   = tell($fh);
    my $curr_line  = <$fh>;
    my $pos_diff   = tell($fh) - $init_pos;
    my $correction = $pos_diff - length $curr_line;
    seek $fh, $init_pos, 0; # Rewind position to proceed to read the file

    while (<$fh>) {
        $last_one = $_;
        $self->{alphabet} ||= $self->guess_alphabet($_);
        if ( $_ =~ /$start_pattern/ ) {
            if ( $done == 0 ) {
                $id = $new_primary_entry;
                $self->{alphabet} ||= $self->guess_alphabet($_);

                my $tmplen = ( tell $fh ) - length($_) - $correction;

                $length = $tmplen - $pos;

                unless ( defined($id) ) {
                    $self->throw("No id defined for sequence");
                }
                unless ( defined($fileid) ) {
                    $self->throw("No fileid defined for file $file");

Bio/DB/Flat/BinarySearch.pm  view on Meta::CPAN

    );

    $index->build_index(@files);

    return $index;
}

# EVERYTHING THAT FOLLOWS THIS
# is an awful hack - in reality Michele's code needs to be rewritten
# to use Bio::SeqIO, but I have too little time to do this -- LS
sub guess_alphabet {
    my $self = shift;
    my $line = shift;

    my $format = $self->format;
    return 'protein' if $format eq 'swissprot';

    if ( $format eq 'genbank' ) {
        return unless $line =~ /^LOCUS/;
        return 'dna' if $line =~ /\s+\d+\s+bp/i;
        return 'protein';

Bio/DB/Flat/BinarySearch.pm  view on Meta::CPAN

        return unless $line =~ /^ID/;
        return 'dna' if $line =~ / DNA;/i;
        return 'rna' if $line =~ / RNA;/i;
        return 'protein';
    }

    return;
}

# return (namespace,primary_pattern,start_pattern,secondary_pattern)
sub _guess_patterns {
    my $self   = shift;
    my $format = shift;
    if ( $format =~ /swiss(prot)?/i ) {
        return ( 'ID', "^ID   (\\S+)", "^ID   (\\S+)",
            { ACC => "^AC   (\\S+);" } );
    }

    if ($format =~ /embl/i) {
        return ('ID',
            "^ID   (\\S+[^; ])",

Bio/DB/GFF/Adaptor/biofetch.pm  view on Meta::CPAN

			gname  => $acc,
			tstart => undef,
			tstop  => undef,
			attributes  => [[Note => $seq->desc],@aliases],
		       }
		      );
  # now load each feature in turn
  my ($transcript_version,$mRNA_version) = (0,0);
  for my $feat ($seq->all_SeqFeatures) {
    my $attributes = $self->get_attributes($feat);
    my $name       = $self->guess_name($attributes);

    my $location = $feat->location;
    my @segments = map {[$_->start,$_->end,$_->seq_id]}
      $location->can('sub_Location') ? $location->sub_Location : $location;

# this changed CDS to coding, but that is the wrong thing to do, since
# CDS is in SOFA and coding is not
#    my $type     =   $feat->primary_tag eq 'CDS'   ? 'coding'
#                   : $feat->primary_tag;
    my $type=  $feat->primary_tag;

Bio/DB/GFF/Adaptor/biofetch.pm  view on Meta::CPAN

  my @tags = $seq->all_tags or return;
  my @result;
  foreach my $tag (@tags) {
    foreach my $value ($seq->each_tag_value($tag)) {
      push @result,[$tag=>$value];
    }
  }
  \@result;
}

sub guess_name {
  my $self = shift;
  my $attributes = shift;
# remove this fix when Lincoln fixes it properly
  return ["Misc" => "Misc"] unless ($attributes);  # these are arbitrary, and possibly destructive defaults
  my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
  my $best = pop @ordered_attributes;
  @$attributes = @ordered_attributes;
  return $best;
}

Bio/DB/GFF/Adaptor/biofetch_oracle.pm  view on Meta::CPAN

			gclass => $self->refclass,
			gname  => $acc,
			tstart => undef,
			tstop  => undef,
			attributes  => [[Note => $seq->desc],@aliases],
		       }
		      );
  # now load each feature in turn
  for my $feat ($seq->all_SeqFeatures) {
    my $attributes = $self->get_attributes($feat);
    my $name       = $self->guess_name($attributes);

    my $location = $feat->location;
    my @segments = map {[$_->start,$_->end,$_->seq_id]}
      $location->can('sub_Location') ? $location->sub_Location : $location;

    my $type     =  $feat->primary_tag eq 'CDS' ? 'mRNA'  : $feat->primary_tag;
    my $parttype =  $feat->primary_tag eq 'gene' ? 'exon' : $feat->primary_tag;

    if ($feat->primary_tag =~ /^(gene|CDS)$/) {
      $self->load_gff_line( {

Bio/DB/GFF/Adaptor/biofetch_oracle.pm  view on Meta::CPAN

  my @tags = $seq->all_tags or return;
  my @result;
  foreach my $tag (@tags) {
    foreach my $value ($seq->each_tag_value($tag)) {
      push @result,[$tag=>$value];
    }
  }
  \@result;
}

sub guess_name {
  my $self = shift;
  my $attributes = shift;
# remove this fix when Lincoln fixes it properly
  return ["Misc" => "Misc"] unless ($attributes);  # these are arbitrary, and possibly destructive defaults
  my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
  my $best = pop @ordered_attributes;
  @$attributes = @ordered_attributes;
  return $best;
}

Bio/DB/GFF/Segment.pm  view on Meta::CPAN

 Title   : alphabet
 Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
 Function: Returns the type of sequence being one of
           'dna', 'rna' or 'protein'. This is case sensitive.

           This is not called <type> because this would cause
           upgrade problems from the 0.5 and earlier Seq objects.

 Returns : a string either 'dna','rna','protein'. NB - the object must
           make a call of the type - if there is no type specified it
           has to guess.
 Args    : none
 Status  : Virtual


=cut

sub alphabet{
    return 'dna'; # no way this will be anything other than dna!
}

Bio/DB/IndexedBase.pm  view on Meta::CPAN

    if ($start > $stop) {
        # Change the strand
        ($start, $stop) = ($stop, $start);
        $strand *= -1;
    }

    return $id, $start, $stop, $strand;
}


sub _guess_alphabet {
    # Determine the molecular type of the given sequence string:
    #    'dna', 'rna', 'protein' or '' (unknown/empty)
    my ($self, $string) = @_;
    # Handle IUPAC residues like PrimarySeq does
    my $alphabet = Bio::PrimarySeq::_guess_alphabet_from_string($self, $string, 1);
    return $alphabet eq 'dna' ? DNA
           : $alphabet eq 'rna' ? RNA
           : $alphabet eq 'protein' ? PROTEIN
           : NA;
}


sub _makeid {
    # Process the header line by applying any transformation given in -makeid
    my ($self, $header_line) = @_;

Bio/DB/SeqI.pm  view on Meta::CPAN

   $self->throw("Object did not provide a get_all_ids method");
}


=head2 get_Seq_by_primary_id

 Title   : get_Seq_by_primary_id
 Usage   : $seq = $db->get_Seq_by_primary_id($primary_id_string);
 Function: Gets a Bio::Seq object by the primary id. The primary
           id in these cases has to come from $db->get_all_primary_ids.
           There is no other way to get (or guess) the primary_ids
           in a database.

           The other possibility is to get Bio::PrimarySeqI objects
           via the get_PrimarySeq_stream and the primary_id field
           on these objects are specified as the ids to use here.
 Returns : A Bio::Seq object
 Args    : accession number (as a string)
 Throws  : "acc does not exist" exception

=cut

Bio/DB/Universal.pm  view on Meta::CPAN


    $embl = Bio::Index::EMBL->new( -filename => '/some/index/filename/locally/stored');
    $uni->use_database('embl',$embl);

    # treat it like a normal database. Recognises strings
    # like gb|XXXXXX and embl:YYYYYY

    $seq1 = $uni->get_Seq_by_id("embl:HSHNRNPA");
    $seq2 = $uni->get_Seq_by_acc("gb|A000012");

    # with no separator, tries to guess database. In this case the
    # _ is considered to be indicative of swissprot
    $seq3 = $uni->get_Seq_by_id('ROA1_HUMAN');

=head1 DESCRIPTION

Artificial database that delegates to specific databases, with a
"smart" (well, smartish) guessing routine for what the ids. No doubt
the smart routine can be made smarter.

The hope is that you can make this database and just throw ids at it -
for most easy cases it will sort you out. Personally, I would be
making sure I knew where each id came from and putting it into its own
database first - but this is a quick and dirty solution.

By default this connects to web orientated databases, with all the
reliability and network bandwidth costs this implies. However you can
subsistute your own local databases - they could be Bio::Index

Bio/DB/Universal.pm  view on Meta::CPAN

 Example :
 Returns : 
 Args    :


=cut

sub get_Seq_by_id{
   my ($self,$str) = @_;

   my ($tag,$id) = $self->guess_id($str);

   return $self->{'db_hash'}->{$tag}->get_Seq_by_id($id);
}


=head2 get_Seq_by_acc

 Title   : get_Seq_by_acc
 Usage   :
 Function:
 Example :
 Returns : 
 Args    :


=cut

sub get_Seq_by_acc {
   my ($self,$str) = @_;

   my ($tag,$id) = $self->guess_id($str);

   return $self->{'db_hash'}->{$tag}->get_Seq_by_acc($id);
}



=head2 guess_id

 Title   : guess_id
 Usage   :
 Function:
 Example :
 Returns : 
 Args    :


=cut

sub guess_id{
   my ($self,$str) = @_;
   
   if( $str =~ /(\S+)[:|\/;](\w+)/ ) {
       my $tag;
       my $db = $1;
       my $id = $2;
       if( $db =~ /gb/i || $db =~ /genbank/i || $db =~ /ncbi/i ) {
	   $tag = 'genbank';
       } elsif ( $db =~ /embl/i || $db =~ /emblbank/ || $db =~ /^em/i ) {
	   $tag = 'embl';
       } elsif ( $db =~ /swiss/i || $db =~ /^sw/i || $db =~ /sptr/ ) {
	   $tag = 'swiss';
       } else {
	   # throw for the moment
	   $self->throw("Could not guess database type $db from $str");
       }
       return ($tag,$id);

   } else {
       my $tag;
       # auto-guess from just the id
       if( $str =~ /_/ ) {
	   $tag = 'swiss';
       } elsif ( $str =~ /^[QPR]\w+\d$/ ) {
	   $tag = 'swiss';
       } elsif ( $str =~ /[A-Z]\d+/ ) {
	   $tag = 'genbank';
       } else {
	   # default genbank...
	   $tag = 'genbank';
       }

Bio/DB/UpdateableSeqI.pm  view on Meta::CPAN



=cut

=head2 get_Seq_by_primary_id

 Title   : get_Seq_by_primary_id
 Usage   : $seq = $db->get_Seq_by_primary_id($primary_id_string);
 Function: Gets a Bio::Seq object by the primary id. The primary
           id in these cases has to come from $db->get_all_primary_ids.
           There is no other way to get (or guess) the primary_ids
           in a database.

           The other possibility is to get Bio::PrimarySeqI objects
           via the get_PrimarySeq_stream and the primary_id field
           on these objects are specified as the ids to use here.
 Returns : A Bio::Seq object
 Args    : accession number (as a string)
 Throws  : "acc does not exist" exception


Bio/Index/AbstractSeq.pm  view on Meta::CPAN

   return values %bytepos;
}


=head2 get_Seq_by_primary_id

 Title   : get_Seq_by_primary_id
 Usage   : $seq = $db->get_Seq_by_primary_id($primary_id_string);
 Function: Gets a Bio::Seq object by the primary id. The primary
           id in these cases has to come from $db->get_all_primary_ids.
           There is no other way to get (or guess) the primary_ids
           in a database.

           The other possibility is to get Bio::PrimarySeqI objects
           via the get_PrimarySeq_stream and the primary_id field
           on these objects are specified as the ids to use here.
 Returns : A Bio::Seq object
 Args    : primary id (as a string)
 Throws  : "acc does not exist" exception


Bio/Map/GeneMap.pm  view on Meta::CPAN

 Usage   : $string = $obj->seq()
 Function: Get/set the sequence as a string of letters. When getting, If the
           GeneMap object didn't have sequence attached directly to it for the
           region requested, the map's gene's database will be asked for the
           sequence, and failing that, the map's gene's positions will be asked
           for their sequences. Areas for which no sequence could be found will
           be filled with Ns, unless no sequence was found anywhere, in which
           case undef is returned.
 Returns : string
 Args    : Optionally on set the new value (a string). An optional second
           argument presets the alphabet (otherwise it will be guessed).

=cut

sub seq {
    my ($self, @args) = @_;
    my $seq = $self->SUPER::seq(@args);
    my $expected_length = $self->length;
    if (! $seq || CORE::length($seq) < $expected_length) {
        my @have = split('', $seq || '');
        my @result;

Bio/Map/GenePosition.pm  view on Meta::CPAN


=head2 seq

 Title   : seq
 Usage   : my $string = $position->seq();
 Function: Get/set the sequence as a string of letters. If no sequence is
           manually set by you, the position's map will be asked for the
           sequence, and if available, that will be returned.
 Returns : scalar
 Args    : Optionally on set the new value (a string). An optional second
           argument presets the alphabet (otherwise it will be guessed).

=cut

sub seq {
    # $shortcut is internal-use only by GeneMap
    my ($self, $str, $alpha, $shortcut) = @_;
    
    my $seq = $self->SUPER::seq($str, $alpha);
    
    if ($seq) {

Bio/Map/PositionWithSequence.pm  view on Meta::CPAN

    return $self;
}

=head2 seq

 Title   : seq
 Usage   : my $string = $obj->seq();
 Function: Get/set the sequence as a string of letters.
 Returns : scalar
 Args    : Optionally on set the new value (a string). An optional second
           argument presets the alphabet (otherwise it will be guessed).

=cut

sub seq {
    my ($self, $str, $alpha) = @_;
    
    # done like this because SUPER will set seq to undef if undef supplied,
    # but GeneMap wants to send undef, undef, 1 to decendants of this method
    
    my $seq;

Bio/MapIO.pm  view on Meta::CPAN

  # object?
  if( $class =~ /Bio::MapIO::(\S+)/ ) {
	my ($self) = $class->SUPER::new(@args);	
	$self->_initialize(@args);
	return $self;
    } else { 
	
	my %param = @args;
	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
	my $format = $param{'-format'} || 
	    $class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
		'mapmaker';
	$format = "\L$format";	# normalize capitalization to lower case

	# normalize capitalization
	return unless( $class->_load_format_module($format) );
	return "Bio::MapIO::$format"->new(@args);
    }

}

Bio/MapIO.pm  view on Meta::CPAN

Exception $@
For more information about the MapIO system please see the MapIO docs.
This includes ways of checking for formats at compile time, not run time
END
  ;
  }
  return $ok;
}


=head2 _guess_format

 Title   : _guess_format
 Usage   : $obj->_guess_format($filename)
 Function:
 Example :
 Returns : guessed format of filename (lower case)
 Args    :

=cut

sub _guess_format {
   my $class = shift;
   return unless $_ = shift;
   return 'mapmaker'   if /\.(map)$/i;
   return 'mapxml' if /\.(xml)$/i;
}

sub DESTROY {
    my $self = shift;

    $self->close();

Bio/Matrix/IO.pm  view on Meta::CPAN

    # object?
    if( $class =~ /Bio::Matrix::IO::(\S+)/ ) {
	my ($self) = $class->SUPER::new(@args);	
	$self->_initialize(@args);
	return $self;
    } else { 

	my %param = @args;
	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
	my $format = $param{'-format'} || 
	    $class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
		'scoring';
	$format = "\L$format";	# normalize capitalization to lower case

	# normalize capitalization
	return unless( $class->_load_format_module($format) );
	return "Bio::Matrix::IO::$format"->new(@args);
    }
}

=head2 newFh

Bio/Matrix/IO.pm  view on Meta::CPAN

For more information about the Matrix::IO system please see the
Matrix::IO docs.  This includes ways of checking for formats at
compile time, not run time
END
  ;
  }
  return $ok;
}


=head2 _guess_format

 Title   : _guess_format
 Usage   : $obj->_guess_format($filename)
 Returns : guessed format of filename (lower case)
 Args    : filename

=cut

sub _guess_format {
   my $class = shift;
   return unless $_ = shift;
   return 'scoring'   if /BLOSUM|PAM$/i;
   return 'phylip'   if /\.dist$/i;
}

sub DESTROY {
    my $self = shift;
    $self->close();
}

Bio/Matrix/PSM/IO.pm  view on Meta::CPAN

    # or do we want to call SUPER on an object if $caller is an
    # object?
    if( $class =~ /Bio::Matrix::PSM::IO(\S+)/ ) {
	$self = $class->SUPER::new(@args);
	$self->_initialize(@args);
	return $self;
    } else {
	my %param = @args;
	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
	my $format = $param{'-format'} ||
	    $class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
	    'scoring';
	$class->throw("$format format unrecognized or an argument error occurred\n.") if (!grep(/$format/,@Bio::Matrix::PSM::IO::PSMFORMATS));
	$format = "\L$format"; # normalize capitalization to lower case

	# normalize capitalization
	return unless( $class->_load_format_module($format) );
	return "Bio::Matrix::PSM::IO::$format"->new(@args);
    }
}

Bio/Matrix/PSM/IO.pm  view on Meta::CPAN

For more information about the Matrix::PSM::IO system please see the
Matrix::PSM::IO docs.  This includes ways of checking for formats at
compile time, not run time
END
  ;
  }
  return $ok;
}


=head2 _guess_format

 Title   : _guess_format
 Usage   : $obj->_guess_format($filename)
 Returns : guessed format of filename (lower case)
 Args    : filename

=cut

sub _guess_format {
    my $class = shift;
    return unless $_ = shift;
    return 'meme'   if /.meme$|meme.html$/i;
    return 'transfac'   if /\.dat$/i;
    return 'mast'   if /^mast\.|\.mast.html$|.mast$/i;
}

=head2 next_psm

 Title   : next_psm

Bio/Nexml/Factory.pm  view on Meta::CPAN

    
    my ($self, $aln, $taxa, @args) = @_;
    
    #most of the code below ripped from Bio::Phylo::Matrices::Matrix::new_from_bioperl()
    if ( $aln->isa('Bio::Align::AlignI') ) {
            $aln->unmatch;
            $aln->map_chars('\.','-');
            my @seqs = $aln->each_seq;
            my ( $type, $missing, $gap, $matchchar ); 
            if ( $seqs[0] ) {
                $type = $seqs[0]->alphabet || $seqs[0]->_guess_alphabet || 'dna';
            }
            else {
                $type = 'dna';
            }
            
            my $matrix = $fac->create_matrix( 
                '-type' => $type,
                '-special_symbols' => {
                    '-missing'   => $aln->missing_char || '?',
                    '-matchchar' => $aln->match_char   || '.',

Bio/Nexml/Factory.pm  view on Meta::CPAN

 Title   : create_bphylo_seq
 Usage   : my $bphylo_seq = $factory->create_bphylo_seq($bperl_seq);
 Function: Converts a L<Bio::Seq> object into Bio::Phylo::Matrices::Matrix object
 Returns : a Bio::Phylo::Matrices::Matrix object
 Args    : Bio::Seq object

=cut

sub create_bphylo_seq {
    my ($self, $seq, $taxa, @args) = @_;
    my $type    = $seq->alphabet || $seq->_guess_alphabet || 'dna';
    $type = uc($type);
    
    my $dat = create_bphylo_datum($seq, $taxa, '-type' => $type);  
        
    # copy seq string
    my $seqstring = $seq->seq;
    if ( $seqstring and $seqstring =~ /\S/ ) {
        eval { $dat->set_char( $seqstring ) };
        if ( $@ and UNIVERSAL::isa($@,'Bio::Phylo::Util::Exceptions::InvalidData') ) {
            $self->throw("\n\nThe BioPerl sequence object contains invalid data ($seqstring)\n");

Bio/Nexml/Factory.pm  view on Meta::CPAN

    my $class = 'Bio::Phylo::Matrices::Datum';
    my $feats;
    # want $seq type-check here? Allowable: is-a Bio::PrimarySeq, 
        #  Bio::LocatableSeq /maj
    if (@args % 2) { # odd
        $feats = shift @args;
        unless (ref($feats) eq 'ARRAY') {
        Bio::Root::Root->throw("Third argument must be array of SeqFeatures");
        }
    }
        my $type = $seq->alphabet || $seq->_guess_alphabet || 'dna';
        my $self = $class->new( '-type' => $type, @args );
        # copy seq string
        my $seqstring = $seq->seq;
        if ( $seqstring and $seqstring =~ /\S/ ) {
            eval { $self->set_char( $seqstring ) };
            if ( $@ and UNIVERSAL::isa($@,'Bio::Phylo::Util::Exceptions::InvalidData') ) {
                $self->throw("\n\nThe BioPerl sequence object contains invalid data ($seqstring)\n");
            }
        }      
        

Bio/Ontology/OntologyStore.pm  view on Meta::CPAN

    foreach my $ont (@_) {
	$self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
	    unless $ont && ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
	# remove it from both the id hash and the name hash
	delete $ont_store_by_id{$ont->identifier()};
	delete $ont_store_by_name{$ont->name()} if $ont->name();
    }
    return 1;
}

=head2 guess_ontology()

 Usage   : my $ontology = 
           Bio::Ontology::OntologyStore->guess_ontology('GO:0000001');
 Function: tries to guess which ontology a term identifier comes from, 
           loads it as necessary,
           and returns it as a Bio::Ontology::Ontology object.
 Example :
 Returns : a Bio::Ontology::Ontology object, or warns and returns undef
 Args    : an ontology term identifier in XXXX:DDDDDDD format.  
           Guessing is based on the XXXX string before the colon.

=cut

sub guess_ontology {
  my ($self,$id) = @_;

  my($prefix) = $id =~ /^(.+?):.+$/;

  my %prefix = (
                SO => 'Sequence Ontology',
                SOFA => 'Sequence Ontology Feature Annotation',
                GO => 'Gene Ontology',
               );

Bio/OntologyIO.pm  view on Meta::CPAN


sub _map_format {
    my $self = shift;
    my $format = shift;
    my $mod;

    if($format) {
        $mod = $format_driver_map{lc($format)};
        $mod = lc($format) unless $mod;
    } else {
        $self->throw("unable to guess ontology format, specify -format");
    }
    return $mod;
}

sub unescape {
  my( $self, $ref ) = @_;
  $ref =~ s/&lt\\;/\</g;
  $ref =~ s/&gt\\;/\>/g;
  $ref =~ s/&pct\\;/\%/g;
  $ref =~ s/\\n/\n/g;



( run in 0.984 second using v1.01-cache-2.11-cpan-748bfb374f4 )