Bio-DB-SeqFeature
view release on metacpan or search on metacpan
lib/Bio/DB/SeqFeature/Store/berkeleydb.pm view on Meta::CPAN
or $self->throw("Couldn't find 'types' index file");
my $primary_tag = $obj->primary_tag;
my $source_tag = $obj->source_tag || '';
return unless defined $primary_tag;
$primary_tag .= ":$source_tag";
my $key = lc $primary_tag;
$self->update_or_delete($delete,$db,$key,$id);
}
# Note: this indexing scheme is space-inefficient because it stores the
# denormalized sequence ID followed by the bin in XXXXXX zero-leading
# format. It should be replaced with a binary numeric encoding and the
# BTREE {compare} attribute changed accordingly.
sub _update_location_index {
my $self = shift;
my ($obj,$id,$delete) = @_;
my $db = $self->index_db('locations')
or $self->throw("Couldn't find 'locations' index file");
my $seq_id = $obj->seq_id || '';
my $start = $obj->start || '';
my $end = $obj->end || '';
my $strand = $obj->strand;
my $bin_min = int $start/BINSIZE;
my $bin_max = int $end/BINSIZE;
for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
my $key = sprintf("%s.%06d",lc($seq_id),$bin);
$self->update_or_delete($delete,$db,$key,pack("i4",$id,$start,$end,$strand));
}
}
sub _update_attribute_index {
my $self = shift;
my ($obj,$id,$delete) = @_;
my $db = $self->index_db('attributes')
or $self->throw("Couldn't find 'attributes' index file");
for my $tag ($obj->get_all_tags) {
for my $value ($obj->get_tag_values($tag)) {
my $key = "${tag}:${value}";
$self->update_or_delete($delete,$db,$key,$id);
}
}
}
sub _update_note_index {
my $self = shift;
my ($obj,$id,$delete) = @_;
return if $delete; # we don't know how to do this
my $fh = $self->notes_db;
my @notes = $obj->get_tag_values('Note') if $obj->has_tag('Note');
print $fh $_,"\t",pack("u*",$id) or $self->throw("An error occurred while updating note index: $!")
foreach @notes;
}
sub update_or_delete {
my $self = shift;
my ($delete,$db,$key,$id) = @_;
if ($delete) {
tied(%$db)->del_dup($key,$id);
} else {
$db->{$key} = $id;
}
}
# these methods return pointers to....
# the database that stores serialized objects
sub db {
my $self = shift;
my $d = $self->setting('db');
$self->setting(db=>shift) if @_;
$d;
}
sub parentage_db {
my $self = shift;
my $d = $self->setting('parentage_db');
$self->setting(parentage_db=>shift) if @_;
$d;
}
# the Bio::DB::Fasta object
sub dna_db {
my $self = shift;
my $d = $self->setting('dna_db');
$self->setting(dna_db=>shift) if @_;
$d;
}
# the specialized notes database
sub notes_db {
my $self = shift;
my $d = $self->setting('notes_db');
$self->setting(notes_db=>shift) if @_;
$d;
}
# the is_indexed_db
sub is_indexed_db {
my $self = shift;
my $d = $self->setting('is_indexed_db');
$self->setting(is_indexed_db=>shift) if @_;
$d;
}
# The indicated index berkeley db
sub index_db {
my $self = shift;
my $index_name = shift;
my $d = $self->setting($index_name);
$self->setting($index_name=>shift) if @_;
$d;
}
sub _mtime {
lib/Bio/DB/SeqFeature/Store/berkeleydb.pm view on Meta::CPAN
sub search_notes {
my $self = shift;
my ($search_string,$limit) = @_;
$search_string =~ tr/*?//d;
my @results;
my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
my $search = join '|',@words;
my (%found,$found);
my $note_index = $self->notes_db;
seek($note_index,0,0); # back to start
while (<$note_index>) {
next unless /$search/;
chomp;
my ($note,$uu) = split "\t";
$found{unpack("u*",$uu)}++;
last if $limit && ++$found >= $limit;
}
my (@features, @matches);
for my $idx (keys %found) {
my $feature = $self->fetch($idx) or next;
my @values = $feature->get_tag_values('Note') if $feature->has_tag('Note');
my $value = "@values";
my $hits;
$hits++ while $value =~ /($search)/ig; # count the number of times we were hit
push @matches,$hits;
push @features,$feature;
}
for (my $i=0; $i<@matches; $i++) {
my $feature = $features[$i];
my $matches = $matches[$i];
my $relevance = 10 * $matches;
my $note;
$note = join ' ',$feature->get_tag_values('Note') if $feature->has_tag('Note');
push @results,[$feature->display_name,$note,$relevance];
}
return @results;
}
sub glob_match {
my $self = shift;
my $term = shift;
return unless $term =~ /([^*?]*)(?:^|[^\\])?[*?]/;
my $stem = $1;
$term =~ s/(^|[^\\])([+\[\]^{}\$|\(\).])/$1\\$2/g;
$term =~ s/(^|[^\\])\*/$1.*/g;
$term =~ s/(^|[^\\])\?/$1./g;
return ($stem,$term);
}
sub update_filter {
my $self = shift;
my ($filter,$results) = @_;
return unless @$results;
if (%$filter) {
my @filtered = grep {$filter->{$_}} @$results;
%$filter = map {$_=>1} @filtered;
} else {
%$filter = map {$_=>1} @$results;
}
}
sub types {
my $self = shift;
eval "require Bio::DB::GFF::Typename"
unless Bio::DB::GFF::Typename->can('new');
my $index = $self->index_db('types');
my $db = tied(%$index);
return map {Bio::DB::GFF::Typename->new($_)} keys %$index;
}
# this is ugly
sub _insert_sequence {
my $self = shift;
my ($seqid,$seq,$offset) = @_;
my $dna_fh = $self->private_fasta_file or return;
if ($offset == 0) { # start of the sequence
print $dna_fh ">$seqid\n";
}
print $dna_fh $seq,"\n";
}
sub _fetch_sequence {
my $self = shift;
my ($seqid,$start,$end) = @_;
my $db = $self->dna_db or return;
return $db->seq($seqid,$start,$end);
}
sub private_fasta_file {
my $self = shift;
return $self->{fasta_fh} if exists $self->{fasta_fh};
$self->{fasta_file} = $self->_qualify("sequence.fa");
return $self->{fasta_fh} = IO::File->new($self->{fasta_file},">");
}
sub finish_bulk_update {
my $self = shift;
if (my $fh = $self->{fasta_fh}) {
$fh->close;
$self->{fasta_db} = Bio::DB::Fasta::Subdir->new($self->{fasta_file});
}
}
sub db_version {
my $self = shift;
my $db = $self->db;
return $db->{'.version'} || 1.00;
( run in 0.693 second using v1.01-cache-2.11-cpan-39bf76dae61 )