Algorithm-SpatialIndex

 view release on metacpan or  search on metacpan

lib/Algorithm/SpatialIndex/Storage/DBI.pm  view on Meta::CPAN

package Algorithm::SpatialIndex::Storage::DBI;
use 5.008001;
use strict;
use warnings;
use Carp qw(croak);

our $VERSION = '0.02';

use parent 'Algorithm::SpatialIndex::Storage';
use constant DEBUG => 0;

=head1 NAME

Algorithm::SpatialIndex::Storage::DBI - DBI storage backend

=head1 SYNOPSIS

  use Algorithm::SpatialIndex;
  my $dbh = ...;
  my $idx = Algorithm::SpatialIndex->new(
    storage      => 'DBI',
    dbh_rw       => $dbh,
    dbh_ro       => $dbh, # defaults to dbh_rw
    table_prefix => 'si_',
  );

=head1 DESCRIPTION

B<WARNING: WHILE IT SEEMS TO WORK FOR ME, THIS STORAGE BACKEND IS HIGHLY
EXPERIMENTAL AND IN A PROOF-OF-CONCEPT STATE.> Unsurprisingly, it is also
20x slower when using SQLite as the storage engine then when using the
memory storage backend. Has only been tested with SQLite but has
mysql-specific and SQLite specific code paths as well as a general
SQL code path which is less careful about race conditions.

Inherits from L<Algorithm::SpatialIndex::Storage>.

This storage backend is persistent.

No implementation of schema migration yet, so expect to have to
reinitialize the index after a module upgrade!

=head1 ACCESSORS

=cut


use constant NODE_ID_TYPE => 'INTEGER';
use constant ITEM_ID_TYPE => 'INTEGER';

use Class::XSAccessor {
  getters => [qw(
    dbh_rw
    table_prefix

    no_of_coords
    coord_types
    node_coord_create_sql
    node_coord_select_sql
    node_coord_insert_sql

    no_of_subnodes
    subnodes_create_sql
    subnodes_select_sql
    subnodes_insert_sql

    bucket_size
    item_coord_types

    config

    dbms_name
    is_mysql
    is_sqlite

  )],
};

=head2 table_prefix

Returns the prefix of the table names.

=head2 coord_types

Returns an array reference containing the coordinate type strings.

=head2 item_coord_types

Returns an array reference containing the item coordinate type strings.

=head2 node_coord_create_sql

Returns the precomputed SQL fragment of the node coordinate
columns (C<CREATE TABLE> syntax).

=head2 no_of_subnodes

Returns the no. of subnodes per node.

=head2 subnodes_create_sql

Returns the precomputed SQL fragment of the subnode id
columns (C<CREATE TABLE> syntax).

=head2 config

Returns the hash reference of configuration options
read from the config table.

=head2 dbh_rw

Returns the read/write database handle.

=head2 dbh_ro

Returns the read-only database handle. Falls back
to the read/write handle if not defined.

=cut

sub dbh_ro {
  my $self = shift;
  if (defined $self->{dbh_ro}) {
    return $self->{dbh_ro};

lib/Algorithm/SpatialIndex/Storage/DBI.pm  view on Meta::CPAN


=head1 OTHER METHODS

=head2 init

Reads the options from the database for previously existing indexes.
Creates tables and writes default configuration for those that didn't
exist before.

Doesn't do any schema migration at this point.

=cut

sub init {
  my $self = shift;

  my $opt = $self->{opt};
  $self->{dbh_rw} = $opt->{dbh_rw};
  $self->{dbh_ro} = $opt->{dbh_ro};
  my $table_prefix = defined($opt->{table_prefix})
                     ? $opt->{table_prefix} : 'spatialindex';
  $self->{table_prefix} = $table_prefix;

  # Dear SQL. Please go away. Thank you.
  $self->{dbms_name} = $self->dbh_ro->get_info(17) if not defined $self->{dbms_name};
  $self->{is_mysql}  = 0;
  $self->{is_sqlite} = 0;

  my $option_table_name = $table_prefix . '_options';
  my $node_table_name   = $table_prefix . '_nodes';

  if ($self->{dbms_name} =~ /mysql/i) {
    $self->{is_mysql} = 1;
    $self->{_write_config_sql} = [
      qq{
        INSERT INTO $option_table_name
        SET id=?, value=?
        ON DUPLICATE KEY UPDATE id=?, value=?
      }, 0, 1, 0, 1
    ];
  }
  elsif ($self->{dbms_name} =~ /sqlite/i) {
    $self->{is_sqlite} = 1;
    $self->{_write_config_sql} = [qq{INSERT OR REPLACE INTO $option_table_name (id, value) VALUES(?, ?)}, 0, 1 ];
  }
  else {
    $self->{_write_config_sql} = sub {
      my $dbh = shift;
      eval {
        $dbh->do(qq{INSERT INTO $option_table_name (id, value) VALUES(?, ?)}, {}, $_[0], $_[1]);
        $dbh->do(qq{UPDATE $option_table_name SET id=?, value=?}, {}, $_[0], $_[1]);
        1;
      };
    };
  }

  my $config_existed = $self->_read_config_table;
  $self->{no_of_coords} = scalar(@{$self->coord_types});
  $self->_coord_types_to_sql($self->coord_types);
  $self->_subnodes_sql($self->no_of_subnodes);
  $self->{_fetch_node_sql} = qq(SELECT id, $self->{node_coord_select_sql}, $self->{subnodes_select_sql} FROM ${table_prefix}_nodes WHERE id=?);
  my $qlist = '?,' x ($self->no_of_subnodes + @{$self->coord_types});
  $qlist =~ s/,$//;
  $self->{_write_new_node_sql} = qq{INSERT INTO $node_table_name (}
                                 . $self->node_coord_select_sql . ', '
                                 . $self->subnodes_select_sql
                                 . qq{) VALUES($qlist)};
  $self->{_write_node_sql} = qq{UPDATE $node_table_name SET id=?, }
                             . $self->node_coord_insert_sql . ', '
                             . $self->subnodes_insert_sql
                             . ' WHERE id=?';
  $self->_bucket_sql; # init sql for bucket operations

  $self->_init_tables();
  $self->_write_config() if not $config_existed;
}

=head2 _read_config_table

Reads the configuration table.
Returns whether this succeeded or not.
In case of failure, this initializes some of the
configuration options from other sources.

=cut

sub _read_config_table {
  my $self = shift;
  my $dbh = $self->dbh_ro;
  my $table_prefix = $self->table_prefix;

  my $find_sth = $dbh->table_info('%', '%', "${table_prefix}_options", 'TABLE');
  my $opt;
  my $success;
  if ($find_sth->fetchrow_arrayref()) {
    my $sql = qq#
          SELECT id, value
          FROM ${table_prefix}_options
        #;
    $success = eval {
      $opt = $dbh->selectall_hashref($sql, 'id');
      my $err = $dbh->errstr;
      die $err if $err;
      1;
    };
  }
  $opt ||= {};
  $opt->{$_} = $opt->{$_}{value} for keys %$opt;
  $self->{config} = $opt;

  if (defined $opt->{coord_types}) {
    $self->{coord_types} = [split / /, $opt->{coord_types}];
  }
  else {
    $self->{coord_types} = [$self->index->strategy->coord_types];
    $opt->{coord_types} = join ' ', @{$self->{coord_types}};
  }

  if (defined $opt->{item_coord_types}) {
    $self->{item_coord_types} = [split / /, $opt->{item_coord_types}];
  }
  else {
    $self->{item_coord_types} = [$self->index->strategy->item_coord_types];
    $opt->{item_coord_types} = join ' ', @{$self->{item_coord_types}};
  }

  $opt->{no_of_subnodes} ||= $self->index->strategy->no_of_subnodes;
  $self->{no_of_subnodes} = $opt->{no_of_subnodes};

  $opt->{bucket_size} ||= $self->index->strategy->bucket_size;
  $self->{bucket_size} = $opt->{bucket_size};

  return $success;
}

=head2 _init_tables

Creates the index's tables.

=cut

sub _init_tables {
  my $self = shift;

  my $dbh = $self->dbh_rw;

  my $table_prefix = $self->table_prefix;
  my $sql_opt = qq(
    CREATE TABLE IF NOT EXISTS ${table_prefix}_options (
      id VARCHAR(255) PRIMARY KEY,
      value VARCHAR(1023)
    )
  );
  warn $sql_opt if DEBUG;
  $dbh->do($sql_opt);

  my $node_id_type = NODE_ID_TYPE;
  my $coord_sql = $self->node_coord_create_sql;
  my $subnodes_sql = $self->subnodes_create_sql;
  my $sql =  qq(
    CREATE TABLE IF NOT EXISTS ${table_prefix}_nodes (

lib/Algorithm/SpatialIndex/Storage/DBI.pm  view on Meta::CPAN

  my $sth;
  if (not defined $id) {
    $sth = $dbh->prepare_cached($self->{_write_new_node_sql});
    my $coords = $node->coords;
    my $snids = $node->subnode_ids;
    my @args = (
      @$coords,
      ((undef) x ($self->no_of_coords - @$coords)),
      @$snids,
      ((undef) x ($self->no_of_subnodes - @$snids))
    );
    $sth->execute(@args);
    $id = $dbh->last_insert_id('', '', '', ''); # FIXME NOT PORTABLE LIKE THAT
    $node->id($id);
  }
  else {
    $sth = $dbh->prepare_cached($self->{_write_node_sql});
    $sth->execute($id, @{$node->coords}, @{$node->subnode_ids}, $id);
  }
  $sth->finish();
  return $id;
}

sub get_option {
  my $self = shift;
  return $self->{config}->{shift()}; # We assume this data changes RARELY
}

sub set_option {
  my $self  = shift;
  my $key   = shift;
  my $value = shift;

  $self->{config}->{$key} = $value;
  $self->_write_config(); # FIXME wasteful
}

sub store_bucket {
  my $self   = shift;
  my $bucket = shift;
  my $dbh = $self->dbh_rw;
  my $id = $bucket->node_id;
  my $sql = $self->{buckets_insert_sql};
  my $is_sub = ref($sql) eq 'CODE';
  if (!$is_sub) {
    my $sth = $dbh->prepare_cached($sql->[0]);
    my $d = [$id, map {@$_} @{$bucket->items}];
    $sth->execute(map $d->[$_], @{$sql}[1..$#$sql]);
    my $err = $sth->errstr; die $err if $err;
    $sth->finish;
  }
  else {
    $sql->($id, map {@$_} @{$bucket->items});
  }
}

sub fetch_bucket {
  my $self    = shift;
  my $node_id = shift;
  my $dbh = $self->dbh_ro;
  my $selsql = $self->{buckets_select_sql};
# This throws SEGV in the driver
  #my $sth = $dbh->prepare_cached($selsql);
  #$sth->execute($node_id) or die $dbh->errstr;
  #my $row = $sth->fetchrow_arrayref;
  #$sth->finish;
  my $rows = $dbh->selectall_arrayref($selsql, {}, $node_id);
  my $row = $rows->[0];
  return undef if not defined $row;
  my $items = [];
  my $n = scalar(@{$self->item_coord_types}) + 1;
  while (@$row > 1) {
    my $item = [splice(@$row, 1, $n)];
    next if not defined $item->[0];
    push @$items, $item;
  }
  my $bucket = $self->bucket_class->new(node_id => $node_id, items => $items);
  return $bucket;
}

sub delete_bucket {
  my $self    = shift;
  my $node_id = shift;
  $node_id = $node_id->node_id if ref($node_id);
  my $tname = $self->table_prefix . '_buckets';
  $self->dbh_rw->do(qq{DELETE FROM $tname WHERE node_id=?}, {}, $node_id);
  return();
}


=head2 _coord_types_to_sql

Given an array ref containing coordinate type strings
(cf. L<Algorithm::SpatialIndex::Strategy>),
stores the SQL fragments for C<SELECT>
and C<CREATE TABLE> for the node coordinates.

The coordinates will be called C<c$i> where C<$i>
starts at 0.

=cut

sub _coord_types_to_sql {
  my $self = shift;
  my $types = shift;

  my %types = (
    float    => 'FLOAT',
    double   => 'DOUBLE',
    integer  => 'INTEGER',
    unsigned => 'INTEGER UNSIGNED',
  );
  my $create_sql = '';
  my $select_sql = '';
  my $insert_sql = '';
  my $i = 0;
  foreach my $type (@$types) {
    my $sql_type = $types{lc($type)};
    die "Invalid coord type '$type'" if not defined $sql_type;
    $create_sql .= "  c$i $sql_type, ";
    $select_sql .= "  c$i, ";
    $insert_sql .= " c$i=?, ";
    $i++;
  }
  $create_sql =~ s/, \z//;
  $select_sql =~ s/, \z//;
  $insert_sql =~ s/, \z//;
  $self->{node_coord_create_sql} = $create_sql;
  $self->{node_coord_select_sql} = $select_sql;
  $self->{node_coord_insert_sql} = $insert_sql;
}

=head2 _subnodes_sql

Given the number of subnodes per node,
creates a string of column specifications
for interpolation into a C<CREATE TABLE>
and one for interpolation into a C<SELECT>.
Saves those strings into the object.

The columns are named C<sn$i> with C<$i>
starting at 0.

=cut

sub _subnodes_sql {
  my $self = shift;
  my $no_subnodes = shift;
  my $create_sql = '';
  my $select_sql = '';
  my $insert_sql = '';
  my $i = 0;
  my $node_id_type = NODE_ID_TYPE;
  foreach my $i (0..$no_subnodes-1) {
    $create_sql .= "  sn$i $node_id_type, ";
    $select_sql .= "  sn$i, ";
    $insert_sql .= " sn$i=?, ";
    $i++;
  }
  $create_sql =~ s/, \z//;
  $select_sql =~ s/, \z//;
  $insert_sql =~ s/, \z//;
  $self->{subnodes_create_sql} = $create_sql;
  $self->{subnodes_select_sql} = $select_sql;
  $self->{subnodes_insert_sql} = $insert_sql;
}

sub _bucket_sql {
  my $self = shift;
  my $bsize = $self->bucket_size;
  my $tname = $self->table_prefix . '_buckets';

  my %types = (
    float    => 'FLOAT',
    double   => 'DOUBLE',
    integer  => 'INTEGER',
    unsigned => 'INTEGER UNSIGNED',
  );
  my $item_coord_types = [map $types{$_}, @{$self->item_coord_types}];

  # i0 INTEGER, i0c0 DOUBLE, i0c1 DOUBLE, ...
  $self->{buckets_create_sql} = qq{CREATE TABLE IF NOT EXISTS $tname ( node_id INTEGER PRIMARY KEY, }
                                . join(
                                  ', ',
                                  map {
                                    my $i = $_;
                                    my $c = 0;
                                    ("i$i INTEGER", map "i${i}c".$c++." $_", @$item_coord_types)
                                  } 0..$bsize-1
                                )
                                . ')';
  $self->{buckets_select_sql} = qq{SELECT * FROM $tname WHERE node_id=?};

  my $insert_id_list = join(
    ', ',
    map {
      my $i = $_;
      "i$i", map "i${i}c$_", 0..$#$item_coord_types
    } 0..$bsize-1
  );
  my $nentries = 1 + $bsize * (1+@$item_coord_types);
  #my $idlist = join(', ', map "i$_" 0..$bsize-1);
  my $qlist  = '?,' x $nentries;
  $qlist =~ s/,$//;
  if ($self->is_mysql) {
    $self->{buckets_insert_sql} = [
      qq{
        INSERT INTO $tname
        VALUES ($qlist)
        ON DUPLICATE KEY UPDATE $insert_id_list
      }, 0..$nentries-1
    ];
  }
  elsif ($self->is_sqlite) {
    $self->{buckets_insert_sql} = [qq{INSERT OR REPLACE INTO $tname VALUES($qlist)}, 0..$nentries-1 ];
  }
  else {
    my $insert_sql = qq{INSERT INTO $tname VALUES(?, $qlist)};
    my $update_sql = qq{UPDATE $tname SET id=?, $insert_id_list};
    $self->{buckets_insert_sql} = sub {
      my $dbh = shift;
      eval {
        $dbh->do($insert_sql, {}, @_, (undef) x ($nentries-@_));
        $dbh->do($update_sql, {}, @_, (undef) x ($nentries-@_));
        1;
      };
    };
  }
  #use Data::Dumper;
  #warn Dumper $self->{buckets_insert_sql};
}

1;
__END__

=head1 AUTHOR

Steffen Mueller, E<lt>smueller@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2010, 2011 by Steffen Mueller

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.

=cut



( run in 0.721 second using v1.01-cache-2.11-cpan-4991d5b9bd9 )