Bio-DB-SeqFeature

 view release on metacpan or  search on metacpan

lib/Bio/DB/SeqFeature/Store/DBI/mysql.pm  view on Meta::CPAN

  my $id = $self->_locationid($seqid);
  my $sequence = $self->_sequence_table;
  my $sth = $self->_prepare(<<END);
REPLACE INTO $sequence (id,offset,sequence) VALUES (?,?,?)
END
  $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
}

###
# This subroutine flags the given primary ID for later reindexing
#
sub flag_for_indexing {
  my $self = shift;
  my $id   = shift;
  my $needs_updating = $self->_update_table;
  my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
  $sth->execute($id) or $self->throw($self->dbh->errstr);
}

###
# Update indexes for given object
#
sub _update_indexes {
  my $self = shift;
  my $obj  = shift;
  defined (my $id   = $obj->primary_id) or return;

  if ($self->{bulk_update_in_progress}) {
    $self->_dump_update_name_index($obj,$id);
    $self->_dump_update_attribute_index($obj,$id);
  } else {
    $self->_update_name_index($obj,$id);
    $self->_update_attribute_index($obj,$id);
  }
}

sub _update_name_index {
  my $self = shift;
  my ($obj,$id) = @_;
  my $name = $self->_name_table;
  my $primary_id = $obj->primary_id;

  $self->_delete_index($name,$id);
  my ($names,$aliases) = $self->feature_names($obj);

  my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");

  $sth->execute($id,$_,1) or $self->throw($sth->errstr)   foreach @$names;
  $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @$aliases;
  $sth->finish;
}

sub _update_attribute_index {
  my $self = shift;
  my ($obj,$id) = @_;
  my $attribute = $self->_attribute_table;
  $self->_delete_index($attribute,$id);

  my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
  for my $tag ($obj->get_all_tags) {
    my $tagid = $self->_attributeid($tag);
    for my $value ($obj->get_tag_values($tag)) {
      $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
    }
  }
  $sth->finish;
}

sub _genericid {
  my $self = shift;
  my ($table,$namefield,$name,$add_if_missing) = @_;
  my $qualified_table = $self->_qualify($table);
  my $sth = $self->_prepare(<<END);
SELECT id FROM $qualified_table WHERE $namefield=?
END
  $sth->execute($name) or die $sth->errstr;
  my ($id) = $sth->fetchrow_array;
  $sth->finish;
  return $id if defined $id;
  return     unless $add_if_missing;

  $sth = $self->_prepare(<<END);
INSERT INTO $qualified_table ($namefield) VALUES (?)
END
  $sth->execute($name) or die $sth->errstr;
  my $dbh = $self->dbh;
  return $dbh->{mysql_insertid};
}

sub _typeid {
  shift->_genericid('typelist','tag',shift,1);
}
sub _locationid {
  shift->_genericid('locationlist','seqname',shift,1);
}
sub _locationid_nocreate {
  shift->_genericid('locationlist','seqname',shift,0);
}
sub _attributeid {
  shift->_genericid('attributelist','tag',shift,1);
}

sub _get_location_and_bin {
  my $self = shift;
  my $feature = shift;
  my $seqid   = $self->_locationid($feature->seq_id||'');
  my $start   = $feature->start;
  my $end     = $feature->end;
  my $strand  = $feature->strand || 0;
  my ($tier,$bin) = $self->get_bin($start,$end);
  return ($seqid,$start,$end,$strand,$tier,$bin);
}

sub get_bin {
  my $self = shift;
  my ($start,$end) = @_;
  my $binsize = MIN_BIN;
  my ($bin_start,$bin_end,$tier);
  $tier = 0;
  while (1) {
    $bin_start = int $start/$binsize;
    $bin_end   = int $end/$binsize;
    last if $bin_start == $bin_end;

lib/Bio/DB/SeqFeature/Store/DBI/mysql.pm  view on Meta::CPAN

  my $store_fh = $self->dump_filehandle('feature');
  my $dbh      = $self->dbh;

  my $autoindex = $self->autoindex;

  for my $obj (@_) {
    my $id       = $self->next_id;
    my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ? $self->_get_location_and_bin($obj) : (undef)x6;
    my $primary_tag = $obj->primary_tag;
    my $source_tag  = $obj->source_tag || '';
    $primary_tag    .= ":$source_tag";
    my $typeid   = $self->_typeid($primary_tag,1);

    print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$dbh->quote($self->freeze($obj))),"\n";
    $obj->primary_id($id);
    $self->_update_indexes($obj) if $indexed && $autoindex;
    $count++;
  }

  # remember whether we are have ever stored a non-indexed feature
  unless ($indexed or $self->{indexed_flag}++) {
    $self->subfeatures_are_indexed(0);
  }
  $count;
}

sub _dump_add_SeqFeature {
  my $self     = shift;
  my $parent   = shift;
  my @children = @_;

  my $dbh = $self->dbh;
  my $fh = $self->dump_filehandle('parent2child');
  my $parent_id = (ref $parent ? $parent->primary_id : $parent) 
    or $self->throw("$parent should have a primary_id");
  my $count = 0;

  for my $child_id (@children) {
    print $fh join("\t",$parent_id,$child_id),"\n";
    $count++;
  }
  $count;
}

sub _dump_update_name_index {
  my $self = shift;
  my ($obj,$id) = @_;
  my $fh      = $self->dump_filehandle('name');
  my $dbh     = $self->dbh;
  my ($names,$aliases) = $self->feature_names($obj);
  print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @$names;
  print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @$aliases;
}

sub _dump_update_attribute_index {
  my $self = shift;
  my ($obj,$id) = @_;
  my $fh        = $self->dump_filehandle('attribute');
  my $dbh       = $self->dbh;
  for my $tag ($obj->all_tags) {
    my $tagid = $self->_attributeid($tag);
    for my $value ($obj->each_tag_value($tag)) {
      print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
    }
  }
}

sub coverage_array {
    my $self = shift;
    my ($seq_name,$start,$end,$types,$bins) = 
	rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],
		   ['TYPES','TYPE','PRIMARY_TAG'],'BINS'],@_);

    $bins  ||= 1000;
    $start ||= 1;
    unless ($end) {
	my $segment = $self->segment($seq_name) or $self->throw("unknown seq_id $seq_name");
	$end        = $segment->end;
    }

    my $binsize = ($end-$start+1)/$bins;
    my $seqid   = $self->_locationid_nocreate($seq_name) || 0;

    return [] unless $seqid;

    # where each bin starts
    my @his_bin_array = map {$start + $binsize * $_}       (0..$bins-1);
    my @sum_bin_array = map {int(($_-1)/SUMMARY_BIN_SIZE)} @his_bin_array;

    my $interval_stats    = $self->_interval_stats_table;
    
    my ($sth,@a);
    if ($types) {
	# pick up the type ids
	my ($from,$where,$group);
	($from,$where,$group,@a) = $self->_types_sql($types,'b');
	$where =~ s/.+AND//s;
	$sth = $self->_prepare(<<END);
SELECT id,tag FROM $from
WHERE  $where
END
;
    } else {
	$sth = $self->_prepare(<<END);
SELECT id,tag FROM typelist
END
    }
    my (@t,$report_tag);
    $sth->execute(@a);
    while (my ($t,$tag) = $sth->fetchrow_array) {
	$report_tag ||= $tag;
	push @t,$t;
    }

    my %bins;
    my $sql = <<END;
SELECT bin,cum_count
  FROM $interval_stats
  WHERE typeid=?
    AND seqid=? AND bin >= ?
  LIMIT 1
END
;



( run in 2.129 seconds using v1.01-cache-2.11-cpan-97f6503c9c8 )