Algorithm-MinPerfHashTwoLevel

 view release on metacpan or  search on metacpan

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN

package Algorithm::MinPerfHashTwoLevel;
use strict;
use warnings;
our $VERSION = '0.16';
our $DEFAULT_VARIANT = 5;

use Exporter qw(import);
use Carp ();

no warnings "portable";
my %constant;
BEGIN {
    %constant= (
        MPH_F_FILTER_UNDEF          =>  (1<<0),
        MPH_F_DETERMINISTIC         =>  (1<<1),
       #MPH_F_NO_DEDUPE             =>  (1<<2),
       #MPH_F_VALIDATE              =>  (1<<3),
        MAX_VARIANT                 =>  5,
        MIN_VARIANT                 =>  5,
        STADTX_HASH_SEED_BYTES      => 16,
        STADTX_HASH_STATE_BYTES     => 32,
    );
}
use constant \%constant;

our %EXPORT_TAGS = (
    'all' => [
        '$DEFAULT_VARIANT',
        'MAX_VARIANT',
        'MIN_VARIANT',
        qw(
            seed_state
            hash_with_state
        ), sort keys %constant
    ],
    'flags' => [ sort grep /MPH_F_/, keys %constant ],
);

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw();


require XSLoader;
XSLoader::load('Algorithm::MinPerfHashTwoLevel', $VERSION);

# Preloaded methods go here.

use Carp ();

sub new {
    my ($class,%opts)= @_;
    my $seed= delete($opts{seed});
    delete($opts{state}) and warn "ignoring 'state' parameter";

    my $o= bless \%opts, $class;

    $o->set_seed($seed) if $seed;

    $o->{variant}= $DEFAULT_VARIANT unless defined $o->{variant};
    $o->{variant}= int(0+$o->{variant});

    $o->{compute_flags} ||= 0;
    $o->{compute_flags} += MPH_F_FILTER_UNDEF
        if delete $o->{filter_undef};
    $o->{compute_flags} += MPH_F_DETERMINISTIC
        if delete $o->{deterministic} or delete $o->{canonical};

    die "Unknown variant '$o->{variant}' in constructor new(), max known is "
        . MAX_VARIANT . " default is " . $DEFAULT_VARIANT
        if $o->{variant} > MAX_VARIANT;
    die "Unknown variant '$o->{variant}' in constructor new(), min known is "
        . MIN_VARIANT . " default is " . $DEFAULT_VARIANT
        if $o->{variant} < MIN_VARIANT;

    return $o;
}

sub compute {
    my ($self, $source_hash)= @_;
    if ($source_hash) {
        $self->{source_hash}= $source_hash;
    } else {
        $source_hash= $self->{source_hash};
    }
    my $debug= $self->{debug} ||= 0;

    # reuse the constructor seed.
    $self->_seed($self->{constructor_seed});

    # find the number of keys we have to deal with
    my $max_tries= $self->{max_tries} || 10;
    my @failed_seeds;
    $self->{failed_seeds}= \@failed_seeds;

    for my $counter ( 1 .. $max_tries ) {
        my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs)
        my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs)
        delete $self->{buckets};

        printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n",
            $counter,
            $max_tries,
            0+keys(%$source_hash),
            unpack("H*",$seed),
            unpack("H*",$state),
            if $debug;

        my $bad_idx= compute_xs($self)
            or return $self->{buckets};

        push @failed_seeds, $seed;
        if ($counter < $max_tries) {
            $self->re_seed();
        }
    }



( run in 0.489 second using v1.01-cache-2.11-cpan-62a16548d74 )