Algorithm-SkipList
view release on metacpan or search on metacpan
lib/Algorithm/SkipList.pm view on Meta::CPAN
}
my $level = $node->level;
for (my $i=0; $i<$level; $i++) {
$update_ref->[$i]->header()->[$i] = $node->header()->[$i];
}
# There's probably a smarter way to handle the last insert and
# last key values, but this is the fastest, easiest, safest and
# most consistent way.
$self->{LASTINSRT} = undef;
$self->reset;
$self->{SIZE} --;
$self->_adjust_level_threshold;
# We shouldn't need to "undef $node" here. The Garbage Collector
# should hanldle that (especially if there's a finger that points
# to it somewhere).
# Note: It doesn't seem to be a wise idea to return a search
# finger for deletions without further analysis
$value;
} else {
carp "key not found", if (warnings::enabled);
return;
}
}
sub exists {
my ($self, $key, $finger) = @_;
(($self->_search($key, $finger))[2] == 0);
}
sub find_with_finger {
my ($self, $key, $finger) = @_;
my ($x, $update_ref, $cmp) = $self->_search_with_finger($key, $finger);
($cmp == 0) ? (
(wantarray) ? ($x->value, $update_ref) : $x->value
) : undef;
}
sub find {
my ($self, $key, $finger) = @_;
my ($node, $update_ref, $cmp) = $self->_search($key, $finger);
($cmp == 0) ? $node->value : undef;
}
sub _first_node { # actually this is the second node
my $self = shift;
my $list = $self->list;
my $node = $list->header()->[0];
}
sub last_key {
my ($self, $node, $index) = @_;
if (@_ > 1) {
$self->{LASTKEY} = [ $node, $index ];
my $check = $index || 0;
if (($check < 0) || ($check >= $self->size)) {
carp "index out of bounds", if (warnings::enabled);
}
}
else {
unless ($self->{LASTKEY}) {
$self->{LASTKEY} = [ $self->_first_node, 0 ];
}
($node, $index) = @{ $self->{LASTKEY} };
}
if ($node) {
return (wantarray) ?
( $node->key, [ $node ], $node->value, $index ) : $node->key;
} else {
return;
}
}
sub first_key {
my $self = shift;
my $node = $self->_first_node;
if ($node) {
return $self->last_key( $node, 0);
}
else {
carp "no _first_node", if (warnings::enabled);
return;
}
}
sub next_key {
my ($self, $last_key, $finger) = @_;
my ($node, $cmp, $value, $index);
if (defined $last_key) {
($node, $finger, $cmp) = $self->_search_with_finger($last_key, $finger);
if ($cmp) {
carp "cannot find last_key", if (warnings::enabled);
return;
}
}
else {
lib/Algorithm/SkipList.pm view on Meta::CPAN
$index++;
}
if ($node->key_cmp($key) == 0) {
$self->last_key( $node, $index );
return $index;
} else {
return;
}
}
sub _debug {
my $self = shift;
my $list = $self->list;
while ($list) {
print STDERR
$list->key||'undef', "=", $list->value||'undef'," ", $list,"\n";
for(my $i=0; $i<$list->level; $i++) {
print STDERR " ", $i," ", $list->header()->[$i]
|| 'undef', "\n";
}
# print STDERR " P ", $list->prev() || 'undef', "\n";
print STDERR "\n";
$list = $list->header()->[0];
}
}
=head1 NAME
Algorithm::SkipList - Perl implementation of skip lists
=head1 REQUIREMENTS
The following non-standard modules are used:
enum
=head1 SYNOPSIS
my $list = new Algorithm::SkipList();
$list->insert( 'key1', 'value' );
$list->insert( 'key2', 'another value' );
$value = $list->find('key2');
$list->delete('key1');
=head1 DESCRIPTION
This is an implementation of I<skip lists> in Perl.
Skip lists are similar to linked lists, except that they have random
links at various I<levels> that allow searches to skip over sections
of the list, like so:
4 +---------------------------> +----------------------> +
| | |
3 +------------> +------------> +-------> +-------> +--> +
| | | | | |
2 +-------> +--> +-------> +--> +--> +--> +-------> +--> +
| | | | | | | | |
1 +--> +--> +--> +--> +--> +--> +--> +--> +--> +--> +--> +
A B C D E F G H I J NIL
A search would start at the top level: if the link to the right
exceeds the target key, then it descends a level.
Skip lists generally perform as well as balanced trees for searching
but do not have the overhead with respect to inserting new items. See
the included file C<Benchmark.txt> for a comparison of performance
with other Perl modules.
For more information on skip lists, see the L</"SEE ALSO"> section below.
Only alphanumeric keys are supported "out of the box". To use numeric
or other types of keys, see L</"Customizing the Node Class"> below.
=head2 Methods
A detailed description of the methods used is below.
=over
=item new
$list = new Algorithm::SkipList();
Creates a new skip list.
If you need to use a different L<node class|/"Node Methods"> for using
customized L<comparison|/"key_cmp"> routines, you will need to specify a
different class:
$list = new Algorithm::SkipList( node_class => 'MyNodeClass' );
See the L</"Customizing the Node Class"> section below.
Specialized internal parameters may be configured:
$list = new Algorithm::SkipList( max_level => 32 );
Defines a different maximum list level.
The initial list (see the L</"list"> method) will be a
L<random|/"_new_node_level"> number of levels, and will increase over
time if inserted nodes have higher levels, up until L</max_level>
levels. See L</max_level> for more information on this parameter.
You can also control the probability used to determine level sizes for
each node by setting the L<P|/"p"> and k values:
$list = new Algorithm::SkipList( p => 0.25, k => 1 );
See L<P|/p> for more information on this parameter.
You can enable duplicate keys by using the following:
$list = new Algorithm::SkipList( duplicates => 1 );
This is an experimental feature. See the L</KNOWN ISSUES> section
below.
=item insert
$list->insert( $key, $value );
Inserts a new node into the list.
You may also use a L<search finger|/"About Search Fingers"> with insert,
provided that the finger is for a key that occurs earlier in the list:
$list->insert( $key, $value, $finger );
Using fingers for inserts is I<not> recommended since there is a risk
of producing corrupted lists.
=item exists
if ($list->exists( $key )) { ... }
Returns true if there exists a node associated with the key, false
otherwise.
This may also be used with L<search fingers|/"About Search Fingers">:
if ($list->exists( $key, $finger )) { ... }
=item find_with_finger
$value = $list->find_with_finger( $key );
Searches for the node associated with the key, and returns the value. If
the key cannot be found, returns C<undef>.
L<Search fingers|/"About Search Fingers"> may also be used:
$value = $list->find_with_finger( $key, $finger );
To obtain the search finger for a key, call L</find_with_finger> in a
list context:
($value, $finger) = $list->find_with_finger( $key );
=item find
$value = $list->find( $key );
$value = $list->find( $key, $finger );
Searches for the node associated with the key, and returns the value. If
the key cannot be found, returns C<undef>.
This method is slightly faster than L</find_with_finger> since it does
not return a search finger when called in list context.
If you are searching for duplicate keys, you must use
L</find_with_finger> or L</find_duplicates>.
=item find_duplicates
lib/Algorithm/SkipList.pm view on Meta::CPAN
=item values
@values = $list->values;
Returns a list of values (corresponding to the keys returned by the
L</keys> method).
This is an autoloading method.
=back
=head2 Internal Methods
Internal methods are documented below. These are intended for
developer use only. These may change in future versions.
=over
=item _search_with_finger
($node, $finger, $cmp) = $list->_search_with_finger( $key );
Searches for the node with a key. If the key is found, that node is
returned along with a L</"header">. If the key is not found, the previous
node from where the node would be if it existed is returned.
Note that the value of C<$cmp>
$cmp = $node->key_cmp( $key )
is returned because it is already determined by L</_search>.
Search fingers may also be specified:
($node, $finger, $cmp) = $list->_search_with_finger( $key, $finger );
Note that the L</"header"> is actually a
L<search finger|/"About Search Fingers">.
=item _search
($node, $finger, $cmp) = $list->_search( $key, [$finger] );
Same as L</_search_with_finger>, only that a search finger is not returned.
(Actually, an initial "dummy" finger is returned.)
This is useful for searches where a finger is not needed. The speed
of searching is improved.
=item k
$k = $list->k;
Returns the I<k> value.
$list->k( $k );
Sets the I<k> value.
Higher values will on the average have less pointers per node, but
take longer for searches. See the section on the L<P|/p> value.
=item p
$plevel = $list->p;
Returns the I<P> value.
$list->p( $plevel );
Changes the value of I<P>. Lower values will on the average have less
pointers per node, but will take longer for searches.
The probability that a particular node will have a forward pointer at
level I<i> is: I<p**(i+k-1)>.
For more information, consult the references below in the
L</"SEE ALSO"> section.
=item max_level
$max = $list->max_level;
Returns the maximum level that L</_new_node_level> can generate.
eval {
$list->max_level( $level );
};
Changes the maximum level. If level is less than L</MIN_LEVEL>, or
greater than L</MAX_LEVEL> or the current list L</level>, this will fail
(hence the need for setting it in an C<eval> block).
The value defaults to L</MAX_LEVEL>, which is 32. There is usually no
need to change this value, since the maximum level that a new node
will have will not be greater than it actually needs, up until 2^32
nodes. (The current version of this module is not designed to handle
lists larger than 2^32 nodes.)
Decreasing the maximum level to less than is needed will likely
degrade performance.
=item _new_node_level
$level = $list->_new_node_level;
This is an internal function for generating a random level for new nodes.
Levels are determined by the L<P|/"p"> value. The probability that a
node will have 1 level is I<P>; the probability that a node will have
2 levels is I<P^2>; the probability that a node will have 3 levels is
I<P^3>, et cetera.
The value will never be greater than L</max_level>.
Note: in earlier versions it was called C<_random_level>.
=item list
$node = $list->list;
Returns the initial node in the list, which is a
C<Algorithm::SkipList::Node> (See L<below|/"Node Methods">.)
The key and value for this node are undefined.
=item _first_node
$node = $list->_first_node;
Returns the first node with a key (the second node) in a list. This
is used by the L</first_key>, L</least>, L</append> and L</merge>
methods.
=item _greatest_node
$node = $list->_greatest_node;
Returns the last node in the list. This is used by the L</append> and
L</greatest> methods.
=item _node_class
$node_class_name = $list->_node_class;
Returns the name of the node class used. By default this is the
C<Algorithm::SkipList::Node>, which is discussed below.
=item _build_distribution
$list->_build_distribution;
Rebuilds the probability distribution array C<{P_LEVELS}> upon calls
to L</_set_p> and L</_set_k>.
=item _set_node_class
=item _set_max_level
=item _set_p
=item _set_k
These methods are used during initialization of the object.
=item _debug
$list->_debug;
Used for debugging skip lists by developer. The output of this
function is subject to change.
=back
=head2 Node Methods
Methods for the L<Algorithm::SkipList::Node> object are documented in
that module. They are for internal use by the main
C<Algorithm::SkipList> module.
=head1 SPECIAL FEATURES
=head2 Tied Hashes
Hashes can be tied to C<Algorithm::SkipList> objects:
tie %hash, 'Algorithm::SkipList';
$hash{'foo'} = 'bar';
$list = tied %hash;
print $list->find('foo'); # returns bar
( run in 1.161 second using v1.01-cache-2.11-cpan-39bf76dae61 )