Algorithm-MinPerfHashTwoLevel
view release on metacpan or search on metacpan
lib/Algorithm/MinPerfHashTwoLevel.pm view on Meta::CPAN
$self->{source_hash}= $source_hash;
} else {
$source_hash= $self->{source_hash};
}
my $debug= $self->{debug} ||= 0;
# reuse the constructor seed.
$self->_seed($self->{constructor_seed});
# find the number of keys we have to deal with
my $max_tries= $self->{max_tries} || 10;
my @failed_seeds;
$self->{failed_seeds}= \@failed_seeds;
for my $counter ( 1 .. $max_tries ) {
my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs)
my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs)
delete $self->{buckets};
printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n",
$counter,
$max_tries,
0+keys(%$source_hash),
unpack("H*",$seed),
unpack("H*",$state),
if $debug;
my $bad_idx= compute_xs($self)
or return $self->{buckets};
push @failed_seeds, $seed;
if ($counter < $max_tries) {
$self->re_seed();
}
}
if ( $max_tries == 1 ) {
die sprintf "Failed to compute minimal perfect hash using seed %s", unpack "H*", $failed_seeds[0];
} else {
Carp::confess(
sprintf "Failed to compute minimal perfect hash after %d tries. Seeds tried: %s",
$max_tries, join(" ", map { unpack "H*", $_ } @failed_seeds)
);
}
# NOT REACHED.
}
sub re_seed {
my $self= shift;
my $source= $self->get_state();
return $self->_seed(substr($source,0,STADTX_HASH_SEED_BYTES) ^ substr($source,STADTX_HASH_SEED_BYTES));
}
sub _seed {
my $self= shift;
if (@_) {
my $seed= shift;
Carp::confess(sprintf "Seed should be undef, or a string exactly %d bytes long, not %d bytes",
STADTX_HASH_SEED_BYTES,length($seed))
if defined($seed) and length($seed) != 16;
$self->{seed}= $seed;
delete $self->{state};
}
if ( !defined $self->{seed} ) {
#1234567812345678
$self->{seed}= "MinPerfHash2Levl";
delete $self->{state};
}
return $self->{seed};
}
sub set_seed {
my ($self,$value)= @_;
my $seed= $self->_seed($value);
return $self->{constructor_seed}= $seed;
}
sub get_seed {
my ($self)= @_;
return $self->_seed();
}
sub get_state {
my $self= shift;
return $self->{state} ||= seed_state($self->_seed());
}
sub get_failed_seeds {
my $self= shift;
return @{$self->{failed_seeds}||[]};
}
1;
__END__
=head1 NAME
Algorithm::MinPerfHashTwoLevel - construct a "two level" minimal perfect hash
=head1 SYNOPSIS
use Algorithm::MinPerfHashTwoLevel;
my $buckets= Algorithm::MinPerfHashTwoLevel->compute(\%source_hash);
=head1 DESCRIPTION
This module implements an algorithm to construct (relatively efficiently) a
minimal perfect hash using the "two level" algorithm. A perfect hash is one
which has no collisions for any keys, a minimal perfect hash has exactly the
same number of buckets as it has keys. The "two level" algorithm involves
computing two hash values for each key. The first is used as an initial lookup
into the bucket array to find a mask value which is used to modify the second
hash value to determine the actual bucket to read from. This module computes
the appropriate mask values.
In this implementation only one 64 bit hash value is computed, but the high
and low 32 bits are used as if there were two hash values. The hash function
used is SipHash 1-3. The full 64 bit hash is called h0, the high 32 bits are
called h1, and the low 32 bits are called h2.)
( run in 0.830 second using v1.01-cache-2.11-cpan-119454b85a5 )