Bio-DB-SeqFeature

 view release on metacpan or  search on metacpan

lib/Bio/DB/SeqFeature/Store/berkeleydb.pm  view on Meta::CPAN

    or $self->throw("Couldn't find 'types' index file");
  my $primary_tag = $obj->primary_tag;
  my $source_tag  = $obj->source_tag || '';
  return unless defined $primary_tag;

  $primary_tag    .= ":$source_tag";
  my $key          = lc $primary_tag;
  $self->update_or_delete($delete,$db,$key,$id);
}

# Note: this indexing scheme is space-inefficient because it stores the
# denormalized sequence ID followed by the bin in XXXXXX zero-leading
# format. It should be replaced with a binary numeric encoding and the
# BTREE {compare} attribute changed accordingly.
sub _update_location_index {
  my $self = shift;
  my ($obj,$id,$delete) = @_;
  my $db = $self->index_db('locations')
    or $self->throw("Couldn't find 'locations' index file");

  my $seq_id      = $obj->seq_id || '';
  my $start       = $obj->start  || '';
  my $end         = $obj->end    || '';
  my $strand      = $obj->strand;
  my $bin_min     = int $start/BINSIZE;
  my $bin_max     = int $end/BINSIZE;

  for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
    my $key = sprintf("%s.%06d",lc($seq_id),$bin);
    $self->update_or_delete($delete,$db,$key,pack("i4",$id,$start,$end,$strand));
  }
}

sub _update_attribute_index {
  my $self      = shift;
  my ($obj,$id,$delete) = @_;
  my $db = $self->index_db('attributes')
    or $self->throw("Couldn't find 'attributes' index file");

  for my $tag ($obj->get_all_tags) {
    for my $value ($obj->get_tag_values($tag)) {
      my $key = "${tag}:${value}";
      $self->update_or_delete($delete,$db,$key,$id);
    }
  }
}

sub _update_note_index {
  my $self = shift;
  my ($obj,$id,$delete) = @_;
  return if $delete; # we don't know how to do this

  my $fh = $self->notes_db;
  my @notes = $obj->get_tag_values('Note') if $obj->has_tag('Note');


  print $fh $_,"\t",pack("u*",$id) or $self->throw("An error occurred while updating note index: $!")
    foreach @notes;
}

sub update_or_delete {
  my $self = shift;
  my ($delete,$db,$key,$id) = @_;
  if ($delete) {
    tied(%$db)->del_dup($key,$id);
  } else {
    $db->{$key} = $id;
  }
}

# these methods return pointers to....
# the database that stores serialized objects
sub db {
  my $self = shift;
  my $d = $self->setting('db');
  $self->setting(db=>shift) if @_;
  $d;
}

sub parentage_db {
  my $self = shift;
  my $d = $self->setting('parentage_db');
  $self->setting(parentage_db=>shift) if @_;
  $d;
}

# the Bio::DB::Fasta object
sub dna_db {
  my $self = shift;
  my $d = $self->setting('dna_db');
  $self->setting(dna_db=>shift) if @_;
  $d;
}

# the specialized notes database
sub notes_db {
  my $self = shift;
  my $d = $self->setting('notes_db');
  $self->setting(notes_db=>shift) if @_;
  $d;
}

# the is_indexed_db 
sub is_indexed_db {
  my $self = shift;
  my $d = $self->setting('is_indexed_db');
  $self->setting(is_indexed_db=>shift) if @_;
  $d;
}

# The indicated index berkeley db
sub index_db {
  my $self = shift;
  my $index_name = shift;
  my $d = $self->setting($index_name);
  $self->setting($index_name=>shift) if @_;
  $d;
}


sub _mtime {

lib/Bio/DB/SeqFeature/Store/berkeleydb.pm  view on Meta::CPAN


sub search_notes {
  my $self = shift;
  my ($search_string,$limit) = @_;

  $search_string =~ tr/*?//d;

  my @results;

  my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
  my $search = join '|',@words;

  my (%found,$found);
  my $note_index = $self->notes_db;
  seek($note_index,0,0);  # back to start
  while (<$note_index>) {
    next unless /$search/;
    chomp;
    my ($note,$uu) = split "\t";
    $found{unpack("u*",$uu)}++;
    last if $limit && ++$found >= $limit;
  }

  my (@features, @matches);
  for my $idx (keys %found) {
    my $feature    = $self->fetch($idx) or next;
    my @values     = $feature->get_tag_values('Note') if $feature->has_tag('Note');
    my $value      = "@values";

    my $hits;
    $hits++ while $value =~ /($search)/ig;  # count the number of times we were hit
    push @matches,$hits;
    push @features,$feature;
  }

  for (my $i=0; $i<@matches; $i++)  {
    my $feature = $features[$i];
    my $matches = $matches[$i];

    my $relevance = 10 * $matches;
    my $note;
    $note   = join ' ',$feature->get_tag_values('Note') if $feature->has_tag('Note');
    push @results,[$feature->display_name,$note,$relevance];
  }

  return @results;
}

sub glob_match {
  my $self = shift;
  my $term = shift;
  return unless $term =~ /([^*?]*)(?:^|[^\\])?[*?]/;
  my $stem = $1;
  $term =~ s/(^|[^\\])([+\[\]^{}\$|\(\).])/$1\\$2/g;
  $term =~ s/(^|[^\\])\*/$1.*/g;
  $term =~ s/(^|[^\\])\?/$1./g;
  return ($stem,$term);
}


sub update_filter {
  my $self = shift;
  my ($filter,$results) = @_;
  return unless @$results;

  if (%$filter) {
    my @filtered = grep {$filter->{$_}} @$results;
    %$filter     = map {$_=>1} @filtered;
  } else {
    %$filter     = map {$_=>1} @$results;
  }

}

sub types {
    my $self = shift;
    eval "require Bio::DB::GFF::Typename" 
	unless Bio::DB::GFF::Typename->can('new');

    my $index = $self->index_db('types');
    my $db    = tied(%$index);
    return map {Bio::DB::GFF::Typename->new($_)} keys %$index;
}

# this is ugly
sub _insert_sequence {
  my $self = shift;
  my ($seqid,$seq,$offset) = @_;
  my $dna_fh = $self->private_fasta_file or return;
  if ($offset == 0) { # start of the sequence
    print $dna_fh ">$seqid\n";
  }
  print $dna_fh $seq,"\n";
}

sub _fetch_sequence {
  my $self = shift;
  my ($seqid,$start,$end) = @_;
  my $db = $self->dna_db or return;
  return $db->seq($seqid,$start,$end);
}

sub private_fasta_file {
  my $self = shift;
  return $self->{fasta_fh} if exists $self->{fasta_fh};
  $self->{fasta_file}   = $self->_qualify("sequence.fa");
  return $self->{fasta_fh} = IO::File->new($self->{fasta_file},">");
}

sub finish_bulk_update {
  my $self = shift;
  if (my $fh = $self->{fasta_fh}) {
    $fh->close;
    $self->{fasta_db} = Bio::DB::Fasta::Subdir->new($self->{fasta_file});
  }
}

sub db_version {
    my $self = shift;
    my $db   = $self->db;
    return $db->{'.version'} || 1.00;



( run in 0.693 second using v1.01-cache-2.11-cpan-39bf76dae61 )