view release on metacpan or search on metacpan
2686 WU-BLAST XML support
(partially implemented, but issues remain)
2691 Bio::Microarray::Tools::ReseqChip depends on CPAN module Statistics::Frequency
(related to bug 2332 above)
2696 global verbosity does not propagate to new objects post-set
(requires more specific implementation details)
2700 Refactor Build.PL
(some of the behind-the-scenes stuff is a little klunky)
2702 Scripts recopied upon each call to './Build test'
(minor bug)
2703 Bio::Tools::GuessSeqFormat guesses SELEX as PHYLIP
(minor bug that mistakes format)
2707 Bio::Tools::Run::StandAloneBlast does not quote shell metacharacters in
filenames, but Bio::SearchIO::blast does
(bug within StandAloneBlast)
2715 LocatableSeq symbols are globally set
(bug related to sequence symbol issues; rarely surfaces but needs addressing)
Bioperl 1.5.2
=============
Bio/Align/AlignI.pm view on Meta::CPAN
Title : consensus_iupac
Usage : $str = $ali->consensus_iupac()
Function :
Makes a consensus using IUPAC ambiguity codes from DNA
and RNA. The output is in upper case except when gaps in
a column force output to be in lower case.
Note that if your alignment sequences contain a lot of
IUPAC ambiquity codes you often have to manually set
alphabet. Bio::PrimarySeq::_guess_type thinks they
indicate a protein sequence.
Returns : consensus string
Argument : none
Throws : on protein sequences
=cut
sub consensus_iupac {
Bio/AlignIO.pm view on Meta::CPAN
# object?
if( $class =~ /Bio::AlignIO::(\S+)/ ) {
my ($self) = $class->SUPER::new(@args);
$self->_initialize(@args);
return $self;
} else {
my %param = @args;
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
my $format = $param{'-format'} ||
$class->_guess_format( $param{-file} || $ARGV[0] );
unless ($format) {
if ($param{-file}) {
$format = Bio::Tools::GuessSeqFormat->new(-file => $param{-file}||$ARGV[0] )->guess;
}
elsif ($param{-fh}) {
$format = Bio::Tools::GuessSeqFormat->new(-fh => $param{-fh}||$ARGV[0] )->guess;
}
}
$format = "\L$format"; # normalize capitalization to lower case
$class->throw("Unknown format given or could not determine it [$format]")
unless $format;
return unless( $class->_load_format_module($format) );
return "Bio::AlignIO::$format"->new(@args);
}
}
Bio/AlignIO.pm view on Meta::CPAN
Returns : 1 for success and 0 for error
Args : Bio::Seq object
=cut
sub write_aln {
my ($self,$aln) = @_;
$self->throw("Sorry, you cannot write to a generic Bio::AlignIO object.");
}
=head2 _guess_format
Title : _guess_format
Usage : $obj->_guess_format($filename)
Function:
Example :
Returns : guessed format of filename (lower case)
Args :
=cut
sub _guess_format {
my $class = shift;
return unless $_ = shift;
return 'clustalw' if /\.aln$/i;
return 'emboss' if /\.(water|needle)$/i;
return 'metafasta' if /\.metafasta$/;
return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
return 'maf' if /\.maf/i;
return 'mega' if /\.(meg|mega)$/i;
return 'meme' if /\.meme$/i;
return 'msf' if /\.(msf|pileup|gcg)$/i;
Bio/AlignIO/stockholm.pm view on Meta::CPAN
=head2 new
Title : new
Usage : my $alignio = Bio::AlignIO->new(-format => 'stockholm'
-file => '>file');
Function: Initialize a new L<Bio::AlignIO::stockholm> reader or writer
Returns : L<Bio::AlignIO> object
Args : -line_length : length of the line for the alignment block
-alphabet : symbol alphabet to set the sequences to. If not set,
the parser will try to guess based on the alignment
accession (if present), defaulting to 'dna'.
-spaces : (optional, def = 1) boolean to add a space in between
the "# STOCKHOLM 1.0" header and the annotation and
the annotation and the alignment.
=cut
sub _initialize {
my ( $self, @args ) = @_;
$self->SUPER::_initialize(@args);
Bio/Annotation/AnnotationFactory.pm view on Meta::CPAN
use base qw(Bio::Root::Root Bio::Factory::ObjectFactoryI);
=head2 new
Title : new
Usage : my $obj = Bio::Annotation::AnnotationFactory->new();
Function: Builds a new Bio::Annotation::AnnotationFactory object
Returns : Bio::Annotation::AnnotationFactory
Args : -type => string, name of a L<Bio::AnnotationI> derived class.
If type is not set the module guesses it based on arguments passed to
method L<create_object>.
=cut
sub new {
my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($type) = $self->_rearrange([qw(TYPE)], @args);
Bio/Annotation/AnnotationFactory.pm view on Meta::CPAN
Args : initialization parameters specific to the type of annotation
object we want.
=cut
sub create_object {
my ($self,@args) = @_;
my $type = $self->type;
if(! $type) {
# we need to guess this
$type = $self->_guess_type(@args);
if(! $type) {
$self->throw("No annotation type set and unable to guess.");
}
# load dynamically if it hasn't been loaded yet
if(! $self->{'_loaded_types'}->{$type}) {
eval {
$self->_load_module($type);
$self->{'_loaded_types'}->{$type} = 1;
};
if($@) {
$self->throw("Bio::AnnotationI implementation $type ".
"failed to load: ".$@);
Bio/Annotation/AnnotationFactory.pm view on Meta::CPAN
$self->throw("'$type' does not implement Bio::AnnotationI. ".
"Too bad.");
}
$self->{'_loaded_types'}->{$type} = 1;
}
return $self->{'type'} = $type;
}
return $self->{'type'};
}
=head2 _guess_type
Title : _guess_type
Usage :
Function: Guesses the right type of L<Bio::AnnotationI> implementation
based on initialization parameters for the prospective
object.
Example :
Returns : the type (a string, the module name)
Args : initialization parameters to be passed to the prospective
cluster object
=cut
sub _guess_type{
my ($self,@args) = @_;
my $type;
# we can only guess from a certain number of arguments
my ($val, $db, $text, $name, $authors, $start, $tree, $node) =
$self->_rearrange([qw(VALUE
DATABASE
TEXT
NAME
AUTHORS
START
TREE_OBJ
NODE
)], @args);
Bio/Annotation/TagTree.pm view on Meta::CPAN
1) an array reference corresponding to the data structure for Data::Stag;
2) a text string in 'xml', 'itext', 'spxr', or 'indent' format. The default
format is 'xml'; this can be changed using tagformat() prior to using value() or
by passing in the proper format using '-tagformat' upon instantiation;
3) another Bio::Annotation::TagTree or Data::Stag node instance. In both cases
a deep copy (duplicate) of the instance is generated.
Beyond checking for an array reference no format guessing occurs (so, for
roundtrip tests ensure that the IO formats correspond). For now, we recommend
when using text input to set tagformat() to one of these formats prior to data
loading to ensure the proper Data::Stag parser is selected. After data loading,
the tagformat() can be changed to change the text string format returned by
value(). (this may be rectified in the future)
This Annotation type is fully BioSQL compatible and could be considered a
temporary replacement for nested Bio::Annotation::Collections, at least until
BioSQL and bioperl-db can support nested annotation collections.
Bio/Annotation/TagTree.pm view on Meta::CPAN
eval { $self->{db} = Data::Stag->nodify($value) };
}
else {
# assuming this is blessed; passing on to node() and copy
$self->node( $value, 'copy' );
}
}
else {
# not trying to guess here for now; we go by the tagformat() setting
my $h = Data::Stag->getformathandler($format);
eval { $self->{db} = Data::Stag->from( $format . 'str', $value ) };
}
$self->throw("Data::Stag error:\n$@") if $@;
}
# get mode?
# How do we return a data structure?
# for now, we use the output (if there is a Data::Stag node present)
# may need to run an eval {} to catch Data::Stag output errors
Bio/Assembly/Contig.pm view on Meta::CPAN
Title : consensus_iupac
Usage : $str = $contig->consensus_iupac()
Function :
Makes a consensus using IUPAC ambiguity codes from DNA
and RNA. The output is in upper case except when gaps in
a column force output to be in lower case.
Note that if your alignment sequences contain a lot of
IUPAC ambiquity codes you often have to manually set
alphabet. Bio::PrimarySeq::_guess_type thinks they
indicate a protein sequence.
Returns : consensus string
Argument : none
Throws : on protein sequences
=cut
sub consensus_iupac {
Bio/Assembly/IO.pm view on Meta::CPAN
return $self;
} else {
my %param = @args;
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
$class->throw("Need at least a file name to proceed!")
unless (defined $param{'-file'} || defined $ARGV[0]);
my $format = $param{'-format'} ||
$class->_guess_format( $param{-file} || $ARGV[0] );
$format = "\L$format"; # normalize capitalization to lower case
if ($format =~ /-/) {
($format, my $variant) = split('-', $format, 2);
push @args, (-variant => $variant);
}
return unless( $class->_load_format_module($format) );
return "Bio::Assembly::IO::$format"->new(@args);
}
Bio/Assembly/IO.pm view on Meta::CPAN
if ( $@ ) {
print STDERR <<END;
$self: could not load $format - for more details on supported formats please see the Assembly::IO docs
Exception $@
END
;
}
return $ok;
}
=head2 _guess_format
Title : _guess_format
Usage : $obj->_guess_format($filename)
Function: guess format based on file suffix
Example :
Returns : guessed format of filename (lower case)
Args :
Notes : formats that _filehandle() will guess includes
ace, phrap and tigr at the moment
=cut
sub _guess_format {
my $class = shift;
my $arg = shift;
return unless defined($arg);
return 'ace' if ($arg =~ /\.ace/i);
return 'phrap' if ($arg =~ /\.phrap/i);
return 'tigr' if ($arg =~ /\.tigr/i);
return 'maq' if ($arg =~ /\.maq/i);
return 'sam' if ($arg =~ /\.[bs]am/i);
return 'bowtie' if ($arg =~ /\.bowtie/i);
Bio/Assembly/IO/bowtie.pm view on Meta::CPAN
my ($file, $index, $no_head, $no_sq) = $self->_rearrange([qw(FILE INDEX NO_HEAD NO_SQ)], @args);
$file =~ s/^<//;
$self->{'_no_head'} = $no_head;
$self->{'_no_sq'} = $no_sq;
# get the sequence so Bio::DB::Sam can work with it
my $refdb;
my $inspector;
if (-e $index && -r _ ) {
$refdb = ($index =~ m/\.gz[^.]*$/) ? $self->_uncompress($index) : $index;
my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$refdb);
$self->throw("'$index' is not a fasta file.")
unless $guesser->guess =~ m/^fasta$/;
} elsif ($HAVE_BOWTIE) {
$inspector = Bio::Tools::Run::Bowtie->new( -command => 'inspect' );
$refdb = $inspector->run($index);
} else {
$self->throw("Bio::Tools::Run::Bowtie is not available - cannot extract refdb from index.");
}
my $bam_file = $self->_make_bam($self->_bowtie_to_sam($file, $refdb));
my $sam = Bio::Assembly::IO->new( -file => "<$bam_file", -refdb => $refdb , -format => 'sam' );
Bio/Assembly/IO/bowtie.pm view on Meta::CPAN
unless ( -e $file && -r _ );
if ($file =~ m/\.gz[^.]*$/) {
$file = $self->_uncompress($file);
$self->close;
open my $fh, '<', $file or $self->throw("Could not read file '$file': $!");
$self->file($file);
$self->_fh($fh);
}
my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$file);
$self->throw("'$file' is not a bowtie formatted file.") unless $guesser->guess =~ m/^bowtie$/;
my %SQ;
my $mapq = 255;
my $in_pair;
my @mate_line;
my $mlen;
# create temp file for working
my ($sam_tmp_h, $sam_tmp_f) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.sam' );
Bio/Cluster/ClusterFactory.pm view on Meta::CPAN
# POD documentation - main docs before the code
=head1 NAME
Bio::Cluster::ClusterFactory - Instantiates a new Bio::ClusterI (or derived class) through a factory
=head1 SYNOPSIS
use Bio::Cluster::ClusterFactory;
# if you don't provide a default type, the factory will try
# some guesswork based on display_id and namespace
my $factory = Bio::Cluster::ClusterFactory->new(-type => 'Bio::Cluster::UniGene');
my $clu = $factory->create_object(-description => 'NAT',
-display_id => 'Hs.2');
=head1 DESCRIPTION
This object will build L<Bio::ClusterI> objects generically.
=head1 FEEDBACK
Bio/Cluster/ClusterFactory.pm view on Meta::CPAN
use base qw(Bio::Factory::ObjectFactory);
=head2 new
Title : new
Usage : my $obj = Bio::Cluster::ClusterFactory->new();
Function: Builds a new Bio::Cluster::ClusterFactory object
Returns : Bio::Cluster::ClusterFactory
Args : -type => string, name of a ClusterI derived class.
If not provided, the factory will have to guess
from ID and namespace, which may or may not be
successful.
=cut
sub new {
my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
Bio/Cluster/ClusterFactory.pm view on Meta::CPAN
-description => description of the cluster
-members => arrayref, members of the cluster
=cut
sub create_object {
my ($self,@args) = @_;
my $type = $self->type();
if(! $type) {
# we need to guess this
$type = $self->_guess_type(@args);
$self->throw("No cluster type set and unable to guess.") unless $type;
$self->type($type);
}
return $type->new(-verbose => $self->verbose, @args);
}
=head2 _guess_type
Title : _guess_type
Usage :
Function: Guesses the right type of L<Bio::ClusterI> implementation
based on initialization parameters for the prospective
object.
Example :
Returns : the type (a string, the module name)
Args : initialization parameters to be passed to the prospective
cluster object
=cut
sub _guess_type{
my ($self,@args) = @_;
my $type;
# we can only guess from a certain number of arguments
my ($dispid, $ns, $members) =
$self->_rearrange([qw(DISPLAY_ID
NAMESPACE
MEMBERS
)], @args);
# Unigene namespace or ID?
if($ns && (lc($ns) eq "unigene")) {
$type = 'Bio::Cluster::UniGene';
} elsif($dispid && ($dispid =~ /^Hs\.[0-9]/)) {
$type = 'Bio::Cluster::UniGene';
Bio/ClusterIO.pm view on Meta::CPAN
# object?
if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
my ($self) = $class->SUPER::new(@args);
$self->_initialize(@args);
return $self;
} else {
my %param = @args;
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
my $format = $param{'-format'} ||
$class->_guess_format( $param{-file} || $ARGV[0] );
$format = "\L$format"; # normalize capitalization to lower case
return unless( $class->_load_format_module($format) );
return "Bio::ClusterIO::$format"->new(@args);
}
}
=head2 format
Title : format
Bio/ClusterIO.pm view on Meta::CPAN
if ( $@ ) {
print STDERR <<END;
$self: could not load $format - for more details on supported formats please see the ClusterIO docs
Exception $@
END
;
}
return $ok;
}
=head2 _guess_format
Title : _guess_format
Usage : $obj->_guess_format($filename)
Function: guess format based on file suffix
Example :
Returns : guessed format of filename (lower case)
Args :
Notes : formats that _filehandle() will guess include unigene and dbsnp
=cut
sub _guess_format {
my $class = shift;
return unless $_ = shift;
return 'unigene' if /\.(data)$/i;
return 'dbsnp' if /\.(xml)$/i;
}
sub DESTROY {
my $self = shift;
$self->close();
Bio/DB/Fasta.pm view on Meta::CPAN
=head1 DESCRIPTION
Bio::DB::Fasta provides indexed access to a single Fasta file, several files,
or a directory of files. It provides persistent random access to each sequence
entry (either as a Bio::PrimarySeqI-compliant object or a string), and to
subsequences within each entry, allowing you to retrieve portions of very large
sequences without bringing the entire sequence into memory. Bio::DB::Fasta is
based on Bio::DB::IndexedBase. See this module's documentation for details.
The Fasta files may contain any combination of nucleotide and protein sequences;
during indexing the module guesses the molecular type. Entries may have any line
length up to 65,536 characters, and different line lengths are allowed in the
same file. However, within a sequence entry, all lines must be the same length
except for the last. An error will be thrown if this is not the case.
The module uses /^E<gt>(\S+)/ to extract the primary ID of each sequence
from the Fasta header. See -makeid in Bio::DB::IndexedBase to pass a callback
routine to reversibly modify this primary ID, e.g. if you wish to extract a
specific portion of the gi|gb|abc|xyz GenBank IDs.
=head1 DATABASE CREATION AND INDEXING
Bio/DB/Fasta.pm view on Meta::CPAN
"length except the last. Line above #$. '$fap' is $l2_len".
" != $l3_len chars.");
}
if ($blank_lines) {
# Blank lines not allowed in entry
$self->throw("Blank lines can only precede header lines, ".
"found preceding line #$.");
}
}
$linelen ||= length $line;
$alphabet ||= $self->_guess_alphabet($line);
$seq_lines++;
}
$last_line = $line;
}
# Process last entry
$self->_check_linelength($linelen);
my $pos = tell $fh;
if (@ids) {
my $strlen = $pos - $offset;
Bio/DB/Flat/BinarySearch.pm view on Meta::CPAN
$self->record_size($record_width);
}
$format ||= DEFAULT_FORMAT;
$self->format($format);
$self->write_flag($write_flag);
if ( $self->write_flag && !$primary_namespace ) {
(
$primary_namespace, $primary_pattern,
$start_pattern, $secondary_patterns
) = $self->_guess_patterns( $self->format );
}
$self->primary_pattern($primary_pattern);
$self->primary_namespace($primary_namespace);
$self->start_pattern($start_pattern);
$self->secondary_patterns($secondary_patterns);
return $self;
}
Bio/DB/Flat/BinarySearch.pm view on Meta::CPAN
# "length $_" will report 4 although the line is 5 bytes in length.
# We assume that all lines have the same line separator and only read current line.
my $init_pos = tell($fh);
my $curr_line = <$fh>;
my $pos_diff = tell($fh) - $init_pos;
my $correction = $pos_diff - length $curr_line;
seek $fh, $init_pos, 0; # Rewind position to proceed to read the file
while (<$fh>) {
$last_one = $_;
$self->{alphabet} ||= $self->guess_alphabet($_);
if ( $_ =~ /$start_pattern/ ) {
if ( $done == 0 ) {
$id = $new_primary_entry;
$self->{alphabet} ||= $self->guess_alphabet($_);
my $tmplen = ( tell $fh ) - length($_) - $correction;
$length = $tmplen - $pos;
unless ( defined($id) ) {
$self->throw("No id defined for sequence");
}
unless ( defined($fileid) ) {
$self->throw("No fileid defined for file $file");
Bio/DB/Flat/BinarySearch.pm view on Meta::CPAN
);
$index->build_index(@files);
return $index;
}
# EVERYTHING THAT FOLLOWS THIS
# is an awful hack - in reality Michele's code needs to be rewritten
# to use Bio::SeqIO, but I have too little time to do this -- LS
sub guess_alphabet {
my $self = shift;
my $line = shift;
my $format = $self->format;
return 'protein' if $format eq 'swissprot';
if ( $format eq 'genbank' ) {
return unless $line =~ /^LOCUS/;
return 'dna' if $line =~ /\s+\d+\s+bp/i;
return 'protein';
Bio/DB/Flat/BinarySearch.pm view on Meta::CPAN
return unless $line =~ /^ID/;
return 'dna' if $line =~ / DNA;/i;
return 'rna' if $line =~ / RNA;/i;
return 'protein';
}
return;
}
# return (namespace,primary_pattern,start_pattern,secondary_pattern)
sub _guess_patterns {
my $self = shift;
my $format = shift;
if ( $format =~ /swiss(prot)?/i ) {
return ( 'ID', "^ID (\\S+)", "^ID (\\S+)",
{ ACC => "^AC (\\S+);" } );
}
if ($format =~ /embl/i) {
return ('ID',
"^ID (\\S+[^; ])",
Bio/DB/GFF/Adaptor/biofetch.pm view on Meta::CPAN
gname => $acc,
tstart => undef,
tstop => undef,
attributes => [[Note => $seq->desc],@aliases],
}
);
# now load each feature in turn
my ($transcript_version,$mRNA_version) = (0,0);
for my $feat ($seq->all_SeqFeatures) {
my $attributes = $self->get_attributes($feat);
my $name = $self->guess_name($attributes);
my $location = $feat->location;
my @segments = map {[$_->start,$_->end,$_->seq_id]}
$location->can('sub_Location') ? $location->sub_Location : $location;
# this changed CDS to coding, but that is the wrong thing to do, since
# CDS is in SOFA and coding is not
# my $type = $feat->primary_tag eq 'CDS' ? 'coding'
# : $feat->primary_tag;
my $type= $feat->primary_tag;
Bio/DB/GFF/Adaptor/biofetch.pm view on Meta::CPAN
my @tags = $seq->all_tags or return;
my @result;
foreach my $tag (@tags) {
foreach my $value ($seq->each_tag_value($tag)) {
push @result,[$tag=>$value];
}
}
\@result;
}
sub guess_name {
my $self = shift;
my $attributes = shift;
# remove this fix when Lincoln fixes it properly
return ["Misc" => "Misc"] unless ($attributes); # these are arbitrary, and possibly destructive defaults
my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
my $best = pop @ordered_attributes;
@$attributes = @ordered_attributes;
return $best;
}
Bio/DB/GFF/Adaptor/biofetch_oracle.pm view on Meta::CPAN
gclass => $self->refclass,
gname => $acc,
tstart => undef,
tstop => undef,
attributes => [[Note => $seq->desc],@aliases],
}
);
# now load each feature in turn
for my $feat ($seq->all_SeqFeatures) {
my $attributes = $self->get_attributes($feat);
my $name = $self->guess_name($attributes);
my $location = $feat->location;
my @segments = map {[$_->start,$_->end,$_->seq_id]}
$location->can('sub_Location') ? $location->sub_Location : $location;
my $type = $feat->primary_tag eq 'CDS' ? 'mRNA' : $feat->primary_tag;
my $parttype = $feat->primary_tag eq 'gene' ? 'exon' : $feat->primary_tag;
if ($feat->primary_tag =~ /^(gene|CDS)$/) {
$self->load_gff_line( {
Bio/DB/GFF/Adaptor/biofetch_oracle.pm view on Meta::CPAN
my @tags = $seq->all_tags or return;
my @result;
foreach my $tag (@tags) {
foreach my $value ($seq->each_tag_value($tag)) {
push @result,[$tag=>$value];
}
}
\@result;
}
sub guess_name {
my $self = shift;
my $attributes = shift;
# remove this fix when Lincoln fixes it properly
return ["Misc" => "Misc"] unless ($attributes); # these are arbitrary, and possibly destructive defaults
my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
my $best = pop @ordered_attributes;
@$attributes = @ordered_attributes;
return $best;
}
Bio/DB/GFF/Segment.pm view on Meta::CPAN
Title : alphabet
Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
Function: Returns the type of sequence being one of
'dna', 'rna' or 'protein'. This is case sensitive.
This is not called <type> because this would cause
upgrade problems from the 0.5 and earlier Seq objects.
Returns : a string either 'dna','rna','protein'. NB - the object must
make a call of the type - if there is no type specified it
has to guess.
Args : none
Status : Virtual
=cut
sub alphabet{
return 'dna'; # no way this will be anything other than dna!
}
Bio/DB/IndexedBase.pm view on Meta::CPAN
if ($start > $stop) {
# Change the strand
($start, $stop) = ($stop, $start);
$strand *= -1;
}
return $id, $start, $stop, $strand;
}
sub _guess_alphabet {
# Determine the molecular type of the given sequence string:
# 'dna', 'rna', 'protein' or '' (unknown/empty)
my ($self, $string) = @_;
# Handle IUPAC residues like PrimarySeq does
my $alphabet = Bio::PrimarySeq::_guess_alphabet_from_string($self, $string, 1);
return $alphabet eq 'dna' ? DNA
: $alphabet eq 'rna' ? RNA
: $alphabet eq 'protein' ? PROTEIN
: NA;
}
sub _makeid {
# Process the header line by applying any transformation given in -makeid
my ($self, $header_line) = @_;
Bio/DB/SeqI.pm view on Meta::CPAN
$self->throw("Object did not provide a get_all_ids method");
}
=head2 get_Seq_by_primary_id
Title : get_Seq_by_primary_id
Usage : $seq = $db->get_Seq_by_primary_id($primary_id_string);
Function: Gets a Bio::Seq object by the primary id. The primary
id in these cases has to come from $db->get_all_primary_ids.
There is no other way to get (or guess) the primary_ids
in a database.
The other possibility is to get Bio::PrimarySeqI objects
via the get_PrimarySeq_stream and the primary_id field
on these objects are specified as the ids to use here.
Returns : A Bio::Seq object
Args : accession number (as a string)
Throws : "acc does not exist" exception
=cut
Bio/DB/Universal.pm view on Meta::CPAN
$embl = Bio::Index::EMBL->new( -filename => '/some/index/filename/locally/stored');
$uni->use_database('embl',$embl);
# treat it like a normal database. Recognises strings
# like gb|XXXXXX and embl:YYYYYY
$seq1 = $uni->get_Seq_by_id("embl:HSHNRNPA");
$seq2 = $uni->get_Seq_by_acc("gb|A000012");
# with no separator, tries to guess database. In this case the
# _ is considered to be indicative of swissprot
$seq3 = $uni->get_Seq_by_id('ROA1_HUMAN');
=head1 DESCRIPTION
Artificial database that delegates to specific databases, with a
"smart" (well, smartish) guessing routine for what the ids. No doubt
the smart routine can be made smarter.
The hope is that you can make this database and just throw ids at it -
for most easy cases it will sort you out. Personally, I would be
making sure I knew where each id came from and putting it into its own
database first - but this is a quick and dirty solution.
By default this connects to web orientated databases, with all the
reliability and network bandwidth costs this implies. However you can
subsistute your own local databases - they could be Bio::Index
Bio/DB/Universal.pm view on Meta::CPAN
Example :
Returns :
Args :
=cut
sub get_Seq_by_id{
my ($self,$str) = @_;
my ($tag,$id) = $self->guess_id($str);
return $self->{'db_hash'}->{$tag}->get_Seq_by_id($id);
}
=head2 get_Seq_by_acc
Title : get_Seq_by_acc
Usage :
Function:
Example :
Returns :
Args :
=cut
sub get_Seq_by_acc {
my ($self,$str) = @_;
my ($tag,$id) = $self->guess_id($str);
return $self->{'db_hash'}->{$tag}->get_Seq_by_acc($id);
}
=head2 guess_id
Title : guess_id
Usage :
Function:
Example :
Returns :
Args :
=cut
sub guess_id{
my ($self,$str) = @_;
if( $str =~ /(\S+)[:|\/;](\w+)/ ) {
my $tag;
my $db = $1;
my $id = $2;
if( $db =~ /gb/i || $db =~ /genbank/i || $db =~ /ncbi/i ) {
$tag = 'genbank';
} elsif ( $db =~ /embl/i || $db =~ /emblbank/ || $db =~ /^em/i ) {
$tag = 'embl';
} elsif ( $db =~ /swiss/i || $db =~ /^sw/i || $db =~ /sptr/ ) {
$tag = 'swiss';
} else {
# throw for the moment
$self->throw("Could not guess database type $db from $str");
}
return ($tag,$id);
} else {
my $tag;
# auto-guess from just the id
if( $str =~ /_/ ) {
$tag = 'swiss';
} elsif ( $str =~ /^[QPR]\w+\d$/ ) {
$tag = 'swiss';
} elsif ( $str =~ /[A-Z]\d+/ ) {
$tag = 'genbank';
} else {
# default genbank...
$tag = 'genbank';
}
Bio/DB/UpdateableSeqI.pm view on Meta::CPAN
=cut
=head2 get_Seq_by_primary_id
Title : get_Seq_by_primary_id
Usage : $seq = $db->get_Seq_by_primary_id($primary_id_string);
Function: Gets a Bio::Seq object by the primary id. The primary
id in these cases has to come from $db->get_all_primary_ids.
There is no other way to get (or guess) the primary_ids
in a database.
The other possibility is to get Bio::PrimarySeqI objects
via the get_PrimarySeq_stream and the primary_id field
on these objects are specified as the ids to use here.
Returns : A Bio::Seq object
Args : accession number (as a string)
Throws : "acc does not exist" exception
Bio/Index/AbstractSeq.pm view on Meta::CPAN
return values %bytepos;
}
=head2 get_Seq_by_primary_id
Title : get_Seq_by_primary_id
Usage : $seq = $db->get_Seq_by_primary_id($primary_id_string);
Function: Gets a Bio::Seq object by the primary id. The primary
id in these cases has to come from $db->get_all_primary_ids.
There is no other way to get (or guess) the primary_ids
in a database.
The other possibility is to get Bio::PrimarySeqI objects
via the get_PrimarySeq_stream and the primary_id field
on these objects are specified as the ids to use here.
Returns : A Bio::Seq object
Args : primary id (as a string)
Throws : "acc does not exist" exception
Bio/Map/GeneMap.pm view on Meta::CPAN
Usage : $string = $obj->seq()
Function: Get/set the sequence as a string of letters. When getting, If the
GeneMap object didn't have sequence attached directly to it for the
region requested, the map's gene's database will be asked for the
sequence, and failing that, the map's gene's positions will be asked
for their sequences. Areas for which no sequence could be found will
be filled with Ns, unless no sequence was found anywhere, in which
case undef is returned.
Returns : string
Args : Optionally on set the new value (a string). An optional second
argument presets the alphabet (otherwise it will be guessed).
=cut
sub seq {
my ($self, @args) = @_;
my $seq = $self->SUPER::seq(@args);
my $expected_length = $self->length;
if (! $seq || CORE::length($seq) < $expected_length) {
my @have = split('', $seq || '');
my @result;
Bio/Map/GenePosition.pm view on Meta::CPAN
=head2 seq
Title : seq
Usage : my $string = $position->seq();
Function: Get/set the sequence as a string of letters. If no sequence is
manually set by you, the position's map will be asked for the
sequence, and if available, that will be returned.
Returns : scalar
Args : Optionally on set the new value (a string). An optional second
argument presets the alphabet (otherwise it will be guessed).
=cut
sub seq {
# $shortcut is internal-use only by GeneMap
my ($self, $str, $alpha, $shortcut) = @_;
my $seq = $self->SUPER::seq($str, $alpha);
if ($seq) {
Bio/Map/PositionWithSequence.pm view on Meta::CPAN
return $self;
}
=head2 seq
Title : seq
Usage : my $string = $obj->seq();
Function: Get/set the sequence as a string of letters.
Returns : scalar
Args : Optionally on set the new value (a string). An optional second
argument presets the alphabet (otherwise it will be guessed).
=cut
sub seq {
my ($self, $str, $alpha) = @_;
# done like this because SUPER will set seq to undef if undef supplied,
# but GeneMap wants to send undef, undef, 1 to decendants of this method
my $seq;
Bio/MapIO.pm view on Meta::CPAN
# object?
if( $class =~ /Bio::MapIO::(\S+)/ ) {
my ($self) = $class->SUPER::new(@args);
$self->_initialize(@args);
return $self;
} else {
my %param = @args;
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
my $format = $param{'-format'} ||
$class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
'mapmaker';
$format = "\L$format"; # normalize capitalization to lower case
# normalize capitalization
return unless( $class->_load_format_module($format) );
return "Bio::MapIO::$format"->new(@args);
}
}
Bio/MapIO.pm view on Meta::CPAN
Exception $@
For more information about the MapIO system please see the MapIO docs.
This includes ways of checking for formats at compile time, not run time
END
;
}
return $ok;
}
=head2 _guess_format
Title : _guess_format
Usage : $obj->_guess_format($filename)
Function:
Example :
Returns : guessed format of filename (lower case)
Args :
=cut
sub _guess_format {
my $class = shift;
return unless $_ = shift;
return 'mapmaker' if /\.(map)$/i;
return 'mapxml' if /\.(xml)$/i;
}
sub DESTROY {
my $self = shift;
$self->close();
Bio/Matrix/IO.pm view on Meta::CPAN
# object?
if( $class =~ /Bio::Matrix::IO::(\S+)/ ) {
my ($self) = $class->SUPER::new(@args);
$self->_initialize(@args);
return $self;
} else {
my %param = @args;
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
my $format = $param{'-format'} ||
$class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
'scoring';
$format = "\L$format"; # normalize capitalization to lower case
# normalize capitalization
return unless( $class->_load_format_module($format) );
return "Bio::Matrix::IO::$format"->new(@args);
}
}
=head2 newFh
Bio/Matrix/IO.pm view on Meta::CPAN
For more information about the Matrix::IO system please see the
Matrix::IO docs. This includes ways of checking for formats at
compile time, not run time
END
;
}
return $ok;
}
=head2 _guess_format
Title : _guess_format
Usage : $obj->_guess_format($filename)
Returns : guessed format of filename (lower case)
Args : filename
=cut
sub _guess_format {
my $class = shift;
return unless $_ = shift;
return 'scoring' if /BLOSUM|PAM$/i;
return 'phylip' if /\.dist$/i;
}
sub DESTROY {
my $self = shift;
$self->close();
}
Bio/Matrix/PSM/IO.pm view on Meta::CPAN
# or do we want to call SUPER on an object if $caller is an
# object?
if( $class =~ /Bio::Matrix::PSM::IO(\S+)/ ) {
$self = $class->SUPER::new(@args);
$self->_initialize(@args);
return $self;
} else {
my %param = @args;
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
my $format = $param{'-format'} ||
$class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
'scoring';
$class->throw("$format format unrecognized or an argument error occurred\n.") if (!grep(/$format/,@Bio::Matrix::PSM::IO::PSMFORMATS));
$format = "\L$format"; # normalize capitalization to lower case
# normalize capitalization
return unless( $class->_load_format_module($format) );
return "Bio::Matrix::PSM::IO::$format"->new(@args);
}
}
Bio/Matrix/PSM/IO.pm view on Meta::CPAN
For more information about the Matrix::PSM::IO system please see the
Matrix::PSM::IO docs. This includes ways of checking for formats at
compile time, not run time
END
;
}
return $ok;
}
=head2 _guess_format
Title : _guess_format
Usage : $obj->_guess_format($filename)
Returns : guessed format of filename (lower case)
Args : filename
=cut
sub _guess_format {
my $class = shift;
return unless $_ = shift;
return 'meme' if /.meme$|meme.html$/i;
return 'transfac' if /\.dat$/i;
return 'mast' if /^mast\.|\.mast.html$|.mast$/i;
}
=head2 next_psm
Title : next_psm
Bio/Nexml/Factory.pm view on Meta::CPAN
my ($self, $aln, $taxa, @args) = @_;
#most of the code below ripped from Bio::Phylo::Matrices::Matrix::new_from_bioperl()
if ( $aln->isa('Bio::Align::AlignI') ) {
$aln->unmatch;
$aln->map_chars('\.','-');
my @seqs = $aln->each_seq;
my ( $type, $missing, $gap, $matchchar );
if ( $seqs[0] ) {
$type = $seqs[0]->alphabet || $seqs[0]->_guess_alphabet || 'dna';
}
else {
$type = 'dna';
}
my $matrix = $fac->create_matrix(
'-type' => $type,
'-special_symbols' => {
'-missing' => $aln->missing_char || '?',
'-matchchar' => $aln->match_char || '.',
Bio/Nexml/Factory.pm view on Meta::CPAN
Title : create_bphylo_seq
Usage : my $bphylo_seq = $factory->create_bphylo_seq($bperl_seq);
Function: Converts a L<Bio::Seq> object into Bio::Phylo::Matrices::Matrix object
Returns : a Bio::Phylo::Matrices::Matrix object
Args : Bio::Seq object
=cut
sub create_bphylo_seq {
my ($self, $seq, $taxa, @args) = @_;
my $type = $seq->alphabet || $seq->_guess_alphabet || 'dna';
$type = uc($type);
my $dat = create_bphylo_datum($seq, $taxa, '-type' => $type);
# copy seq string
my $seqstring = $seq->seq;
if ( $seqstring and $seqstring =~ /\S/ ) {
eval { $dat->set_char( $seqstring ) };
if ( $@ and UNIVERSAL::isa($@,'Bio::Phylo::Util::Exceptions::InvalidData') ) {
$self->throw("\n\nThe BioPerl sequence object contains invalid data ($seqstring)\n");
Bio/Nexml/Factory.pm view on Meta::CPAN
my $class = 'Bio::Phylo::Matrices::Datum';
my $feats;
# want $seq type-check here? Allowable: is-a Bio::PrimarySeq,
# Bio::LocatableSeq /maj
if (@args % 2) { # odd
$feats = shift @args;
unless (ref($feats) eq 'ARRAY') {
Bio::Root::Root->throw("Third argument must be array of SeqFeatures");
}
}
my $type = $seq->alphabet || $seq->_guess_alphabet || 'dna';
my $self = $class->new( '-type' => $type, @args );
# copy seq string
my $seqstring = $seq->seq;
if ( $seqstring and $seqstring =~ /\S/ ) {
eval { $self->set_char( $seqstring ) };
if ( $@ and UNIVERSAL::isa($@,'Bio::Phylo::Util::Exceptions::InvalidData') ) {
$self->throw("\n\nThe BioPerl sequence object contains invalid data ($seqstring)\n");
}
}
Bio/Ontology/OntologyStore.pm view on Meta::CPAN
foreach my $ont (@_) {
$self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
unless $ont && ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
# remove it from both the id hash and the name hash
delete $ont_store_by_id{$ont->identifier()};
delete $ont_store_by_name{$ont->name()} if $ont->name();
}
return 1;
}
=head2 guess_ontology()
Usage : my $ontology =
Bio::Ontology::OntologyStore->guess_ontology('GO:0000001');
Function: tries to guess which ontology a term identifier comes from,
loads it as necessary,
and returns it as a Bio::Ontology::Ontology object.
Example :
Returns : a Bio::Ontology::Ontology object, or warns and returns undef
Args : an ontology term identifier in XXXX:DDDDDDD format.
Guessing is based on the XXXX string before the colon.
=cut
sub guess_ontology {
my ($self,$id) = @_;
my($prefix) = $id =~ /^(.+?):.+$/;
my %prefix = (
SO => 'Sequence Ontology',
SOFA => 'Sequence Ontology Feature Annotation',
GO => 'Gene Ontology',
);
Bio/OntologyIO.pm view on Meta::CPAN
sub _map_format {
my $self = shift;
my $format = shift;
my $mod;
if($format) {
$mod = $format_driver_map{lc($format)};
$mod = lc($format) unless $mod;
} else {
$self->throw("unable to guess ontology format, specify -format");
}
return $mod;
}
sub unescape {
my( $self, $ref ) = @_;
$ref =~ s/<\\;/\</g;
$ref =~ s/>\\;/\>/g;
$ref =~ s/&pct\\;/\%/g;
$ref =~ s/\\n/\n/g;