Algorithm-MinPerfHashTwoLevel
view release on metacpan or search on metacpan
lib/Algorithm/MinPerfHashTwoLevel.pm view on Meta::CPAN
return $o;
}
sub compute {
my ($self, $source_hash)= @_;
if ($source_hash) {
$self->{source_hash}= $source_hash;
} else {
$source_hash= $self->{source_hash};
}
my $debug= $self->{debug} ||= 0;
# reuse the constructor seed.
$self->_seed($self->{constructor_seed});
# find the number of keys we have to deal with
my $max_tries= $self->{max_tries} || 10;
my @failed_seeds;
$self->{failed_seeds}= \@failed_seeds;
for my $counter ( 1 .. $max_tries ) {
my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs)
my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs)
delete $self->{buckets};
printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n",
$counter,
$max_tries,
0+keys(%$source_hash),
unpack("H*",$seed),
unpack("H*",$state),
if $debug;
my $bad_idx= compute_xs($self)
or return $self->{buckets};
push @failed_seeds, $seed;
if ($counter < $max_tries) {
$self->re_seed();
}
}
lib/Algorithm/MinPerfHashTwoLevel.pm view on Meta::CPAN
=head2 METHODS
=over 4
=item new
Construct a new Algorithm::MinPerfHashTwoLevel object. Optional arguments
which may be provided are 'source_hash' which is a hash reference to use
as the source for the minimal perfect hash, 'seed' which is expected to be
a 16 byte string, and 'debug' which is expected to be 0 or 1, as well
as variant, which may be 5 (use version v0.14 for variants before 5).
The default is 5.
=item compute
Compute the buckets for a two level minimal perfect hash. Either operates
on the 'source_hash' passed into the constructor, or requires one to passed
in as an argument.
Returns an array of hashes containing information about each bucket:
lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm view on Meta::CPAN
my ($class, %opts)= @_;
my $ofile= $opts{file}
or die "file is a mandatory option to make_file";
my $source_hash= $opts{source_hash}
or die "source_hash is a mandatory option to make_file";
$opts{comment}= "" unless defined $opts{comment};
$opts{variant}= $DEFAULT_VARIANT unless defined $opts{variant};
my $comment= $opts{comment}||"";
my $debug= $opts{debug} || 0;
my $variant= int($opts{variant});
my $deterministic;
$deterministic //= delete $opts{canonical};
$deterministic //= delete $opts{deterministic};
$deterministic //= 1;
#1234567812345678
$opts{seed} = "MinPerfHash2Levl"
if !defined($opts{seed}) and $deterministic;
lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm view on Meta::CPAN
if $variant > MAX_VARIANT;
die "Unknown variant '$variant', min known is "
. MIN_VARIANT . " default is " . $DEFAULT_VARIANT
if $variant < MIN_VARIANT;
die "comment cannot contain null"
if index($comment,"\0") >= 0;
my $seed= $opts{seed};
my $hasher= Algorithm::MinPerfHashTwoLevel->new(
debug => $debug,
seed => (ref $seed ? $$seed : $seed),
variant => $variant,
compute_flags => $compute_flags,
max_tries => $opts{max_tries},
);
my $buckets= $hasher->compute($source_hash);
my $buf_length= $hasher->{buf_length};
my $state= $hasher->{state};
my $buf= packed_xs($variant, $buf_length, $state, $comment, $compute_flags, @$buckets);
$$seed= $hasher->get_seed if ref $seed;
lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm view on Meta::CPAN
Tie::Hash::MinPerfHashTwoLevel::OnDisk - construct or tie a "two level" minimal perfect hash based on disk
=head1 SYNOPSIS
use Tie::Hash::MinPerfHashTwoLevel::OnDisk;
Tie::Hash::MinPerfHashTwoLevel::OnDisk->make_file(
file => $some_file,
source_hash => $some_hash,
comment => "this is a comment",
debug => 0,
);
my %hash;
tie %hash, "Tie::Hash::MinPerfHashTwoLevel::OnDisk", $some_file;
=head1 DESCRIPTION
This module allows one to either construct, or use a precomputed minimal
perfect hash on disk via tied interface. The disk image of the hash is
loaded by using mmap, which means that multiple processes may use the
lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm view on Meta::CPAN
Ignore keys with undef values during construction. This means that exists() checks
may differ between source and the constructed hash table, but avoids the need to
store such keys in the resulting file, saving space. Same as setting the
MPH_F_FILTER_UNDEF bit in compute_flags.
=item max_tries
The maximum number of attempts to make to find a solution for this keyset.
Defaults to 3.
=item debug
Enable debug during generation.
=item variant
Select which variant of construction algorithm and file format to produce.
When omitted the variant is determined by the global var
$Tie::Hash::MinPerfHashTwoLevel::DEFAULT_VARIANT
which itself defaults to the latest version. This is mostly for testing,
Older variants will be deprecated and removed eventually.
dXSTARG|5.006000||p
deb_curcv|||
deb_nocontext|||vn
deb_stack_all|||
deb_stack_n|||
debop||5.005000|
debprofdump||5.005000|
debprof|||
debstackptrs||5.007003|
debstack||5.007003|
debug_start_match|||
deb||5.007003|v
defelem_target|||
del_sv|||
delimcpy_no_escape|||n
delimcpy||5.004000|n
despatch_signals||5.007001|
destroy_matcher|||
die_nocontext|||vn
die_sv|5.013001||p
die_unwind|||
get_and_check_backslash_N_name|||
get_aux_mg|||
get_av|5.006000||p
get_c_backtrace_dump|||
get_c_backtrace|||
get_context||5.006000|n
get_cvn_flags|||
get_cvs|5.011000||p
get_cv|5.006000||p
get_db_sub|||
get_debug_opts|||
get_hash_seed|||
get_hv|5.006000||p
get_mstats|||
get_no_modify|||
get_num|||
get_op_descs||5.005000|
get_op_names||5.005000|
get_opargs|||
get_ppaddr||5.006000|
get_sv|5.006000||p
ibcmp_utf8||5.007003|
ibcmp|||
incline|||
incpush_if_exists|||
incpush_use_sep|||
incpush|||
ingroup|||
init_argv_symbols|||
init_constants|||
init_dbargs|||
init_debugger|||
init_global_struct|||
init_ids|||
init_interp|||
init_main_stash|||
init_named_cv|||
init_perllib|||
init_postdump_symbols|||
init_predump_symbols|||
init_stacks||5.005000|
init_tm||5.007002|
magic_clearhint|||
magic_clearisa|||
magic_clearpack|||
magic_clearsig|||
magic_copycallchecker|||
magic_dump||5.006000|
magic_existspack|||
magic_freearylen_p|||
magic_freeovrld|||
magic_getarylen|||
magic_getdebugvar|||
magic_getdefelem|||
magic_getnkeys|||
magic_getpack|||
magic_getpos|||
magic_getsig|||
magic_getsubstr|||
magic_gettaint|||
magic_getuvar|||
magic_getvec|||
magic_get|||
magic_methpack|||
magic_nextpack|||
magic_regdata_cnt|||
magic_regdatum_get|||
magic_regdatum_set|||
magic_scalarpack|||
magic_set_all_env|||
magic_setarylen|||
magic_setcollxfrm|||
magic_setdbline|||
magic_setdebugvar|||
magic_setdefelem|||
magic_setenv|||
magic_sethint|||
magic_setisa|||
magic_setlvref|||
magic_setmglob|||
magic_setnkeys|||
magic_setnonelem|||
magic_setpack|||
magic_setpos|||
restore_magic|||
restore_switched_locale|||
rninstr|||n
rpeep|||
rsignal_restore|||
rsignal_save|||
rsignal_state||5.004000|
rsignal||5.004000|
run_body|||
run_user_filter|||
runops_debug||5.005000|
runops_standard||5.005000|
rv2cv_op_cv||5.013006|
rvpv_dup|||
rxres_free|||
rxres_restore|||
rxres_save|||
safesyscalloc||5.006000|n
safesysfree||5.006000|n
safesysmalloc||5.006000|n
safesysrealloc||5.006000|n
set_numeric_standard||5.006000|
set_numeric_underlying|||
set_padlist|||n
set_regex_pv|||
setdefout|||
setfd_cloexec_for_nonsysfd|||
setfd_cloexec_or_inhexec_by_sysfdness|||
setfd_cloexec|||n
setfd_inhexec_for_sysfd|||
setfd_inhexec|||n
setlocale_debug_string|||n
share_hek_flags|||
share_hek||5.004000|
should_warn_nl|||n
si_dup|||
sighandler|||n
simplify_sort|||
skip_to_be_ignored_text|||
softref2xv|||
sortcv_stacked|||
sortcv_xsub|||
uiv_2buf|||n
unlnk|||
unpack_rec|||
unpack_str||5.007003|
unpackstring||5.008001|
unreferenced_to_tmp_stack|||
unshare_hek_or_pvn|||
unshare_hek|||
unsharepvn||5.003070|
unwind_handler_stack|||
update_debugger_info|||
upg_version||5.009005|
usage|||
utf16_textfilter|||
utf16_to_utf8_reversed||5.006001|
utf16_to_utf8||5.006001|
utf8_distance||5.006000|
utf8_hop_back|||n
utf8_hop_forward|||n
utf8_hop_safe|||n
utf8_hop||5.006000|n
t/Algo_fail.t view on Meta::CPAN
use strict;
use warnings;
use Test::More tests => 3;
use Data::Dumper; $Data::Dumper::Sortkeys=1; $Data::Dumper::Useqq=1;
BEGIN { use_ok('Algorithm::MinPerfHashTwoLevel') };
#########################
my $class= "Algorithm::MinPerfHashTwoLevel";
my $o= $class->new("seed"=>"1234567812345678",debug=>$ENV{TEST_VERBOSE});
my (%bad,$data);
%bad= ("x"=>"whatever","y"=>"z","p"=>"q");
for my $bad_tuple (
[ [], "value too long", qr/Error: Not expecting a reference value/ ],
[ "x" x 0x10000, "ref value", qr/Error: String in source hash is too long to store/ ]
) {
my ($val,$name,$like)= @$bad_tuple;
my $ok= eval {
t/Algo_v5.t view on Meta::CPAN
use Test::More tests => 7;
use Data::Dumper; $Data::Dumper::Sortkeys=1; $Data::Dumper::Useqq=1;
BEGIN { use_ok('Algorithm::MinPerfHashTwoLevel') };
use Algorithm::MinPerfHashTwoLevel qw(hash_with_state);
#########################
my $class= "Algorithm::MinPerfHashTwoLevel";
my $o= $class->new("seed"=>"1234567812345678",debug=>$ENV{TEST_VERBOSE},variant=>5);
my $state= $o->get_state;
my $state_as_hex= unpack "h*",$o->get_state;
is($state_as_hex,"4475044405b585b4c5d575a5454485c5050465a50515e445247574d4752525c4","state for seed is as expected");
for my $tuple (
[ "fnorble", 17300635747579776751 ],
[ "blah blah blah", 7551733764733872358 ],
[ "blah blaH blah", 15370689535882563083 ],
) {
my ($str,$want_hash)= @$tuple;
my $got_hash= hash_with_state($str,$state);
t/OnDisk.pl view on Meta::CPAN
my $seed_arg= $seed;
ok(1,"starting testset ($title)");
#diag "building file $test_file";
my $got_file;
my $this_comment= "this is a comment: $title";
my $eval_ok= eval {
$got_file= $class->make_file(
file => $test_file,
source_hash => $source_hash,
comment => $this_comment,
debug => $ENV{TEST_VERBOSE},
seed => \$seed_arg,
variant => $variant,
canonical => $canonical,
);
1;
};
my $error= !$eval_ok && $@;
is($error,"","should be no error ($title)");
ok($eval_ok,"make_file should not die ($title)");
if ($eval_ok) {
( run in 1.172 second using v1.01-cache-2.11-cpan-49f99fa48dc )