Algorithm-MinPerfHashTwoLevel

 view release on metacpan or  search on metacpan

lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm  view on Meta::CPAN

sub make_file {
    my ($class, %opts)= @_;

    my $ofile= $opts{file} 
        or die "file is a mandatory option to make_file";
    my $source_hash= $opts{source_hash}
        or die "source_hash is a mandatory option to make_file";
    $opts{comment}= "" unless defined $opts{comment};
    $opts{variant}= $DEFAULT_VARIANT unless defined $opts{variant};
    
    my $comment= $opts{comment}||"";
    my $debug= $opts{debug} || 0;
    my $variant= int($opts{variant});
    my $deterministic;
    $deterministic //= delete $opts{canonical};
    $deterministic //= delete $opts{deterministic};
    $deterministic //= 1;

                    #1234567812345678
    $opts{seed} = "MinPerfHash2Levl"
        if !defined($opts{seed}) and $deterministic;

    my $compute_flags= int($opts{compute_flags}||0);
    $compute_flags |= MPH_F_NO_DEDUPE if delete $opts{no_dedupe};
    $compute_flags |= MPH_F_DETERMINISTIC
        if $deterministic;
    $compute_flags |= MPH_F_FILTER_UNDEF
        if delete $opts{filter_undef};

    die "Unknown variant '$variant', max known is "
        . MAX_VARIANT . " default is " . $DEFAULT_VARIANT
        if $variant > MAX_VARIANT;
    die "Unknown variant '$variant', min known is "
        . MIN_VARIANT . " default is " . $DEFAULT_VARIANT
        if $variant < MIN_VARIANT;

    die "comment cannot contain null"
        if index($comment,"\0") >= 0;

    my $seed= $opts{seed};
    my $hasher= Algorithm::MinPerfHashTwoLevel->new(
        debug => $debug,
        seed => (ref $seed ? $$seed : $seed),
        variant => $variant,
        compute_flags => $compute_flags,
        max_tries => $opts{max_tries},
    );
    my $buckets= $hasher->compute($source_hash);
    my $buf_length= $hasher->{buf_length};
    my $state= $hasher->{state};
    my $buf= packed_xs($variant, $buf_length, $state, $comment, $compute_flags, @$buckets);
    $$seed= $hasher->get_seed if ref $seed;

    my $tmp_file= "$ofile.$$";
    open my $ofh, ">", $tmp_file
        or die "Failed to open $tmp_file for output";
    print $ofh $buf
        or die "failed to print to '$tmp_file': $!";
    close $ofh
        or die "failed to close '$tmp_file': $!";
    rename $tmp_file, $ofile
        or die "failed to rename '$tmp_file' to '$ofile': $!";
    return $ofile;
}

sub validate_file {
    my ($class, %opts)= @_;
    my $file= $opts{file}
        or die "file is a mandatory option to validate_file";
    my $verbose= $opts{verbose};
    my ($variant,$msg);

    my $error_sv;
    my $self= $class->new(file => $file, flags => MPH_F_VALIDATE, error_rsv => \$error_sv);
    if ($self) {
        $msg= sprintf "file '%s' is a valid '%s' file\n"
         . "  variant: %d\n"
         . "  keys: %d\n"
         . "  hash-state: %s\n"
         . "  table  checksum: %016x\n"
         . "  string checksum: %016x\n"
         . "  comment: %s"
         ,  $file,
            MAGIC_STR,
            $self->get_hdr_variant,
            $self->get_hdr_num_buckets,
            unpack("H*", $self->get_state),
            $self->get_hdr_table_checksum,
            $self->get_hdr_str_buf_checksum,
            $self->get_comment,
        ;
        $variant = $self->get_hdr_variant;
    } else {
        $msg= $error_sv;
    }
    if ($verbose) {
        if (defined $variant) {
            print $msg;
        } else {
            die $msg."\n";
        }
    }
    return ($variant, $msg);
}



1;
__END__

=head1 NAME

Tie::Hash::MinPerfHashTwoLevel::OnDisk - construct or tie a "two level" minimal perfect hash based on disk

=head1 SYNOPSIS

  use Tie::Hash::MinPerfHashTwoLevel::OnDisk;

  Tie::Hash::MinPerfHashTwoLevel::OnDisk->make_file(
    file => $some_file,
    source_hash => $some_hash,
    comment => "this is a comment",



( run in 1.534 second using v1.01-cache-2.11-cpan-524268b4103 )