Algorithm-MinPerfHashTwoLevel
view release on metacpan or search on metacpan
lib/Algorithm/MinPerfHashTwoLevel.pm view on Meta::CPAN
package Algorithm::MinPerfHashTwoLevel;
use strict;
use warnings;
our $VERSION = '0.16';
our $DEFAULT_VARIANT = 5;
use Exporter qw(import);
use Carp ();
no warnings "portable";
my %constant;
BEGIN {
%constant= (
MPH_F_FILTER_UNDEF => (1<<0),
MPH_F_DETERMINISTIC => (1<<1),
#MPH_F_NO_DEDUPE => (1<<2),
#MPH_F_VALIDATE => (1<<3),
MAX_VARIANT => 5,
MIN_VARIANT => 5,
STADTX_HASH_SEED_BYTES => 16,
STADTX_HASH_STATE_BYTES => 32,
);
}
use constant \%constant;
our %EXPORT_TAGS = (
'all' => [
'$DEFAULT_VARIANT',
'MAX_VARIANT',
'MIN_VARIANT',
qw(
seed_state
hash_with_state
), sort keys %constant
],
'flags' => [ sort grep /MPH_F_/, keys %constant ],
);
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw();
require XSLoader;
XSLoader::load('Algorithm::MinPerfHashTwoLevel', $VERSION);
# Preloaded methods go here.
use Carp ();
sub new {
my ($class,%opts)= @_;
my $seed= delete($opts{seed});
delete($opts{state}) and warn "ignoring 'state' parameter";
my $o= bless \%opts, $class;
$o->set_seed($seed) if $seed;
$o->{variant}= $DEFAULT_VARIANT unless defined $o->{variant};
$o->{variant}= int(0+$o->{variant});
$o->{compute_flags} ||= 0;
$o->{compute_flags} += MPH_F_FILTER_UNDEF
if delete $o->{filter_undef};
$o->{compute_flags} += MPH_F_DETERMINISTIC
if delete $o->{deterministic} or delete $o->{canonical};
die "Unknown variant '$o->{variant}' in constructor new(), max known is "
. MAX_VARIANT . " default is " . $DEFAULT_VARIANT
if $o->{variant} > MAX_VARIANT;
die "Unknown variant '$o->{variant}' in constructor new(), min known is "
. MIN_VARIANT . " default is " . $DEFAULT_VARIANT
if $o->{variant} < MIN_VARIANT;
return $o;
}
sub compute {
my ($self, $source_hash)= @_;
if ($source_hash) {
$self->{source_hash}= $source_hash;
} else {
$source_hash= $self->{source_hash};
}
my $debug= $self->{debug} ||= 0;
# reuse the constructor seed.
$self->_seed($self->{constructor_seed});
# find the number of keys we have to deal with
my $max_tries= $self->{max_tries} || 10;
my @failed_seeds;
$self->{failed_seeds}= \@failed_seeds;
for my $counter ( 1 .. $max_tries ) {
my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs)
my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs)
delete $self->{buckets};
printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n",
$counter,
$max_tries,
0+keys(%$source_hash),
unpack("H*",$seed),
unpack("H*",$state),
if $debug;
my $bad_idx= compute_xs($self)
or return $self->{buckets};
push @failed_seeds, $seed;
if ($counter < $max_tries) {
$self->re_seed();
}
}
( run in 0.489 second using v1.01-cache-2.11-cpan-62a16548d74 )