Algorithm-MinPerfHashTwoLevel

 view release on metacpan or  search on metacpan

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN

    return $o;
}

sub compute {
    my ($self, $source_hash)= @_;
    if ($source_hash) {
        $self->{source_hash}= $source_hash;
    } else {
        $source_hash= $self->{source_hash};
    }
    my $debug= $self->{debug} ||= 0;

    # reuse the constructor seed.
    $self->_seed($self->{constructor_seed});

    # find the number of keys we have to deal with
    my $max_tries= $self->{max_tries} || 10;
    my @failed_seeds;
    $self->{failed_seeds}= \@failed_seeds;

    for my $counter ( 1 .. $max_tries ) {
        my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs)
        my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs)
        delete $self->{buckets};

        printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n",
            $counter,
            $max_tries,
            0+keys(%$source_hash),
            unpack("H*",$seed),
            unpack("H*",$state),
            if $debug;

        my $bad_idx= compute_xs($self)
            or return $self->{buckets};

        push @failed_seeds, $seed;
        if ($counter < $max_tries) {
            $self->re_seed();
        }
    }

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN


=head2 METHODS

=over 4

=item new

Construct a new Algorithm::MinPerfHashTwoLevel object. Optional arguments
which may be provided are 'source_hash' which is a hash reference to use
as the source for the minimal perfect hash, 'seed' which is expected to be
a 16 byte string, and 'debug' which is expected to be 0 or 1, as well
as variant, which may be 5 (use version v0.14 for variants before 5). 
The default is 5.

=item compute

Compute the buckets for a two level minimal perfect hash. Either operates
on the 'source_hash' passed into the constructor, or requires one to passed
in as an argument.

Returns an array of hashes containing information about each bucket:

lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm  view on Meta::CPAN

    my ($class, %opts)= @_;

    my $ofile= $opts{file} 
        or die "file is a mandatory option to make_file";
    my $source_hash= $opts{source_hash}
        or die "source_hash is a mandatory option to make_file";
    $opts{comment}= "" unless defined $opts{comment};
    $opts{variant}= $DEFAULT_VARIANT unless defined $opts{variant};
    
    my $comment= $opts{comment}||"";
    my $debug= $opts{debug} || 0;
    my $variant= int($opts{variant});
    my $deterministic;
    $deterministic //= delete $opts{canonical};
    $deterministic //= delete $opts{deterministic};
    $deterministic //= 1;

                    #1234567812345678
    $opts{seed} = "MinPerfHash2Levl"
        if !defined($opts{seed}) and $deterministic;

lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm  view on Meta::CPAN

        if $variant > MAX_VARIANT;
    die "Unknown variant '$variant', min known is "
        . MIN_VARIANT . " default is " . $DEFAULT_VARIANT
        if $variant < MIN_VARIANT;

    die "comment cannot contain null"
        if index($comment,"\0") >= 0;

    my $seed= $opts{seed};
    my $hasher= Algorithm::MinPerfHashTwoLevel->new(
        debug => $debug,
        seed => (ref $seed ? $$seed : $seed),
        variant => $variant,
        compute_flags => $compute_flags,
        max_tries => $opts{max_tries},
    );
    my $buckets= $hasher->compute($source_hash);
    my $buf_length= $hasher->{buf_length};
    my $state= $hasher->{state};
    my $buf= packed_xs($variant, $buf_length, $state, $comment, $compute_flags, @$buckets);
    $$seed= $hasher->get_seed if ref $seed;

lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm  view on Meta::CPAN

Tie::Hash::MinPerfHashTwoLevel::OnDisk - construct or tie a "two level" minimal perfect hash based on disk

=head1 SYNOPSIS

  use Tie::Hash::MinPerfHashTwoLevel::OnDisk;

  Tie::Hash::MinPerfHashTwoLevel::OnDisk->make_file(
    file => $some_file,
    source_hash => $some_hash,
    comment => "this is a comment",
    debug => 0,
  );

  my %hash;
  tie %hash, "Tie::Hash::MinPerfHashTwoLevel::OnDisk", $some_file;

=head1 DESCRIPTION

This module allows one to either construct, or use a precomputed minimal
perfect hash on disk via tied interface. The disk image of the hash is
loaded by using mmap, which means that multiple processes may use the

lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm  view on Meta::CPAN

Ignore keys with undef values during construction. This means that exists() checks
may differ between source and the constructed hash table, but avoids the need to
store such keys in the resulting file, saving space. Same as setting the
MPH_F_FILTER_UNDEF bit in compute_flags.

=item max_tries

The maximum number of attempts to make to find a solution for this keyset.
Defaults to 3. 

=item debug

Enable debug during generation.

=item variant

Select which variant of construction algorithm and file format to produce.
When omitted the variant is determined by the global var

   $Tie::Hash::MinPerfHashTwoLevel::DEFAULT_VARIANT

which itself defaults to the latest version. This is mostly for testing,
Older variants will be deprecated and removed eventually.

ppport.h  view on Meta::CPAN

dXSTARG|5.006000||p
deb_curcv|||
deb_nocontext|||vn
deb_stack_all|||
deb_stack_n|||
debop||5.005000|
debprofdump||5.005000|
debprof|||
debstackptrs||5.007003|
debstack||5.007003|
debug_start_match|||
deb||5.007003|v
defelem_target|||
del_sv|||
delimcpy_no_escape|||n
delimcpy||5.004000|n
despatch_signals||5.007001|
destroy_matcher|||
die_nocontext|||vn
die_sv|5.013001||p
die_unwind|||

ppport.h  view on Meta::CPAN

get_and_check_backslash_N_name|||
get_aux_mg|||
get_av|5.006000||p
get_c_backtrace_dump|||
get_c_backtrace|||
get_context||5.006000|n
get_cvn_flags|||
get_cvs|5.011000||p
get_cv|5.006000||p
get_db_sub|||
get_debug_opts|||
get_hash_seed|||
get_hv|5.006000||p
get_mstats|||
get_no_modify|||
get_num|||
get_op_descs||5.005000|
get_op_names||5.005000|
get_opargs|||
get_ppaddr||5.006000|
get_sv|5.006000||p

ppport.h  view on Meta::CPAN

ibcmp_utf8||5.007003|
ibcmp|||
incline|||
incpush_if_exists|||
incpush_use_sep|||
incpush|||
ingroup|||
init_argv_symbols|||
init_constants|||
init_dbargs|||
init_debugger|||
init_global_struct|||
init_ids|||
init_interp|||
init_main_stash|||
init_named_cv|||
init_perllib|||
init_postdump_symbols|||
init_predump_symbols|||
init_stacks||5.005000|
init_tm||5.007002|

ppport.h  view on Meta::CPAN

magic_clearhint|||
magic_clearisa|||
magic_clearpack|||
magic_clearsig|||
magic_copycallchecker|||
magic_dump||5.006000|
magic_existspack|||
magic_freearylen_p|||
magic_freeovrld|||
magic_getarylen|||
magic_getdebugvar|||
magic_getdefelem|||
magic_getnkeys|||
magic_getpack|||
magic_getpos|||
magic_getsig|||
magic_getsubstr|||
magic_gettaint|||
magic_getuvar|||
magic_getvec|||
magic_get|||

ppport.h  view on Meta::CPAN

magic_methpack|||
magic_nextpack|||
magic_regdata_cnt|||
magic_regdatum_get|||
magic_regdatum_set|||
magic_scalarpack|||
magic_set_all_env|||
magic_setarylen|||
magic_setcollxfrm|||
magic_setdbline|||
magic_setdebugvar|||
magic_setdefelem|||
magic_setenv|||
magic_sethint|||
magic_setisa|||
magic_setlvref|||
magic_setmglob|||
magic_setnkeys|||
magic_setnonelem|||
magic_setpack|||
magic_setpos|||

ppport.h  view on Meta::CPAN

restore_magic|||
restore_switched_locale|||
rninstr|||n
rpeep|||
rsignal_restore|||
rsignal_save|||
rsignal_state||5.004000|
rsignal||5.004000|
run_body|||
run_user_filter|||
runops_debug||5.005000|
runops_standard||5.005000|
rv2cv_op_cv||5.013006|
rvpv_dup|||
rxres_free|||
rxres_restore|||
rxres_save|||
safesyscalloc||5.006000|n
safesysfree||5.006000|n
safesysmalloc||5.006000|n
safesysrealloc||5.006000|n

ppport.h  view on Meta::CPAN

set_numeric_standard||5.006000|
set_numeric_underlying|||
set_padlist|||n
set_regex_pv|||
setdefout|||
setfd_cloexec_for_nonsysfd|||
setfd_cloexec_or_inhexec_by_sysfdness|||
setfd_cloexec|||n
setfd_inhexec_for_sysfd|||
setfd_inhexec|||n
setlocale_debug_string|||n
share_hek_flags|||
share_hek||5.004000|
should_warn_nl|||n
si_dup|||
sighandler|||n
simplify_sort|||
skip_to_be_ignored_text|||
softref2xv|||
sortcv_stacked|||
sortcv_xsub|||

ppport.h  view on Meta::CPAN

uiv_2buf|||n
unlnk|||
unpack_rec|||
unpack_str||5.007003|
unpackstring||5.008001|
unreferenced_to_tmp_stack|||
unshare_hek_or_pvn|||
unshare_hek|||
unsharepvn||5.003070|
unwind_handler_stack|||
update_debugger_info|||
upg_version||5.009005|
usage|||
utf16_textfilter|||
utf16_to_utf8_reversed||5.006001|
utf16_to_utf8||5.006001|
utf8_distance||5.006000|
utf8_hop_back|||n
utf8_hop_forward|||n
utf8_hop_safe|||n
utf8_hop||5.006000|n

t/Algo_fail.t  view on Meta::CPAN

use strict;
use warnings;

use Test::More tests => 3;
use Data::Dumper; $Data::Dumper::Sortkeys=1; $Data::Dumper::Useqq=1;
BEGIN { use_ok('Algorithm::MinPerfHashTwoLevel') };

#########################

my $class= "Algorithm::MinPerfHashTwoLevel";
my $o= $class->new("seed"=>"1234567812345678",debug=>$ENV{TEST_VERBOSE});
my (%bad,$data);

%bad= ("x"=>"whatever","y"=>"z","p"=>"q");

for my $bad_tuple (
    [ [],               "value too long",   qr/Error: Not expecting a reference value/  ],
    [ "x" x 0x10000,    "ref value",        qr/Error: String in source hash is too long to store/  ]
) {
    my ($val,$name,$like)= @$bad_tuple;
    my $ok= eval {

t/Algo_v5.t  view on Meta::CPAN


use Test::More tests => 7;
use Data::Dumper; $Data::Dumper::Sortkeys=1; $Data::Dumper::Useqq=1;
BEGIN { use_ok('Algorithm::MinPerfHashTwoLevel') };
use Algorithm::MinPerfHashTwoLevel qw(hash_with_state);

#########################

my $class= "Algorithm::MinPerfHashTwoLevel";

my $o= $class->new("seed"=>"1234567812345678",debug=>$ENV{TEST_VERBOSE},variant=>5);
my $state= $o->get_state;
my $state_as_hex= unpack "h*",$o->get_state;
is($state_as_hex,"4475044405b585b4c5d575a5454485c5050465a50515e445247574d4752525c4","state for seed is as expected");
for my $tuple (
    [ "fnorble",        17300635747579776751 ],
    [ "blah blah blah",  7551733764733872358 ],
    [ "blah blaH blah", 15370689535882563083 ],
) {
    my ($str,$want_hash)= @$tuple;
    my $got_hash= hash_with_state($str,$state);

t/OnDisk.pl  view on Meta::CPAN

                my $seed_arg= $seed;
                ok(1,"starting testset ($title)");
                #diag "building file $test_file";
                my $got_file;
                my $this_comment= "this is a comment: $title";
                my $eval_ok= eval {
                    $got_file= $class->make_file(
                        file        => $test_file,
                        source_hash => $source_hash,
                        comment     => $this_comment,
                        debug       => $ENV{TEST_VERBOSE},
                        seed        => \$seed_arg,
                        variant     => $variant,
                        canonical   => $canonical,
                    );
                    1;
                };
                my $error= !$eval_ok && $@;
                is($error,"","should be no error ($title)");
                ok($eval_ok,"make_file should not die ($title)");
                if ($eval_ok) {



( run in 1.172 second using v1.01-cache-2.11-cpan-49f99fa48dc )