Algorithm-MinPerfHashTwoLevel

 view release on metacpan or  search on metacpan

MinPerfHashTwoLevel.xs  view on Meta::CPAN

#include <sys/stat.h>
#include <assert.h>
#include "mph2l.h"
#include "mph_hv_macro.h"
#include "mph_siphash.h"

#define MAX_VARIANT 5
#define MIN_VARIANT 5

MPH_STATIC_INLINE void
sv_set_from_bucket(pTHX_ SV *sv, U8 *strs, const U32 ofs, const U32 len, const U32 idx, const U8 *flags, const U32 bits, const U8 utf8_default, const U8 utf8_default_shift) {
    U8 *ptr;
    U8 is_utf8;
    if (ofs) {
        ptr= (strs) + (ofs);
        if (utf8_default) {
            is_utf8= utf8_default >> utf8_default_shift;
        } else {
            GETBITS(is_utf8,flags,idx,bits);
        }
    } else {
        ptr= 0;
        is_utf8= 0;
    }
    /* note that sv_setpvn() will cause the sv to
     * become undef if ptr is 0 */
    sv_setpvn_mg((sv),ptr,len);
    if (is_utf8 > 1) {
        sv_utf8_upgrade(sv);
    }
    else
    if (is_utf8) {
        SvUTF8_on(sv);
    }
    else
    if (ptr) {
        SvUTF8_off(sv);
    }
}

MPH_STATIC_INLINE int
lookup_bucket(pTHX_ struct mph_header *mph, U32 index, SV *key_sv, SV *val_sv)

MinPerfHashTwoLevel.xs  view on Meta::CPAN

    U8 *key_pv;
    U64 h0;
    U32 h1;
    U32 h2;
    U32 index;
    U8 *got_key_pv;
    STRLEN got_key_len;

    if (SvUTF8(key_sv)) {
        SV *tmp= sv_2mortal(newSVsv(key_sv));
        sv_utf8_downgrade(tmp,1);
        key_sv= tmp;
    }
    key_pv= SvPV(key_sv,key_len);
    h0= mph_hash_with_state(state,key_pv,key_len);
    h1= h0 >> 32;
    index= h1 % mph->num_buckets;

    bucket= buckets + index;
    if (!bucket->xor_val)
        return 0;

MinPerfHashTwoLevel.xs  view on Meta::CPAN

    }
    return head->variant;
}

void
mph_munmap(struct mph_obj *obj) {
    munmap(obj->header,obj->bytes);
}

STRLEN
normalize_with_flags(pTHX_ SV *sv, SV *normalized_sv, SV *is_utf8_sv, int downgrade) {
    STRLEN len;
    if (SvROK(sv)) {
        croak("Error: Not expecting a reference value in source hash");
    }
    sv_setsv(normalized_sv,sv);
    if (SvOK(sv)) {
        STRLEN pv_len;
        char *pv= SvPV(sv,pv_len);
        if (pv_len > 0xFFFF)
            croak("Error: String in source hash is too long to store, max length is %u got length %lu", 0xFFFF, pv_len);
        if (SvUTF8(sv)) {
            if (downgrade)
                sv_utf8_downgrade(normalized_sv,1);
            if (SvUTF8(normalized_sv)) {
                SvUTF8_off(normalized_sv);
                sv_setiv(is_utf8_sv,1);
            } else {
                sv_setiv(is_utf8_sv,2);
            }
        }
        return pv_len;
    } else {
        sv_setiv(is_utf8_sv, 0);
        return 0;
    }
}

U32
_roundup(const U32 n, const U32 s) {
    const U32 r= n % s;
    if (r) {
        return n + s - r;
    } else {

MinPerfHashTwoLevel.xs  view on Meta::CPAN

normalize_source_hash(pTHX_ HV *source_hv, AV *keys_av, U32 compute_flags, SV *buf_length_sv, char *state_pv) {
    dMY_CXT;
    HE *he;
    U32 buf_length= 0;
    U32 ctr;

    hv_iterinit(source_hv);
    while (he= hv_iternext(source_hv)) {
        SV *val_sv= HeVAL(he);
        SV *val_normalized_sv;
        SV *val_is_utf8_sv;

        SV *key_sv;
        SV *key_normalized_sv;
        SV *key_is_utf8_sv;
        HV *hv;
        U8 *key_pv;
        STRLEN key_len;
        U64 h0;

        if (!val_sv) croak("panic: no sv for value?");
        if (!SvOK(val_sv) && (compute_flags & MPH_F_FILTER_UNDEF)) continue;

        hv= newHV();
        val_normalized_sv= newSV(0);
        val_is_utf8_sv= newSVuv(0);

        key_sv= newSVhek(HeKEY_hek(he));
        key_normalized_sv= newSV(0);
        key_is_utf8_sv= newSVuv(0);

        hv_ksplit(hv,15);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY,            key_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_NORMALIZED, key_normalized_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,    key_is_utf8_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL,            SvREFCNT_inc_simple_NN(val_sv));
        hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED, val_normalized_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,    val_is_utf8_sv);
        /* install everything into the keys_av just in case normalize_with_flags() dies */
        av_push(keys_av,newRV_noinc((SV*)hv));

        buf_length += normalize_with_flags(aTHX_ key_sv, key_normalized_sv, key_is_utf8_sv, 1);
        buf_length += normalize_with_flags(aTHX_ val_sv, val_normalized_sv, val_is_utf8_sv, 0);

        key_pv= (U8 *)SvPV(key_normalized_sv,key_len);
        h0= mph_hash_with_state(state_pv,key_pv,key_len);

        hv_store_ent_with_keysv(hv,MPH_KEYSV_H0,             newSVuv(h0));
    }
    if (buf_length_sv)
        sv_setuv(buf_length_sv, buf_length);

    /* we now know how many keys there are, and what the max_xor_val should be */

MinPerfHashTwoLevel.xs  view on Meta::CPAN

    char *state;
    struct mph_bucket *table;
    char *key_flags;
    char *val_flags;
    char *str_buf_start;
    char *str_buf_end;
    char *str_buf_pos;
    U32 i;
    STRLEN pv_len;
    char *pv;
    U32 key_is_utf8_count[3]={0,0,0};
    U32 val_is_utf8_count[2]={0,0};
    U32 used_flags;
    U32 the_flag;
    IV key_is_utf8_generic=-1;
    IV val_is_utf8_generic=-1;

    for (i= 0; i < bucket_count; i++) {
        SV **got= av_fetch(buckets_av,i,0);
        HV *hv= (HV *)SvRV(*got);
        HE *key_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
        HE *val_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
        key_is_utf8_count[SvUV(HeVAL(key_is_utf8_he))]++;
        val_is_utf8_count[SvUV(HeVAL(val_is_utf8_he))]++;
    }
    used_flags= 0;
    if (key_is_utf8_count[0]) { the_flag= 0; used_flags++; }
    if (key_is_utf8_count[1]) { the_flag= 1; used_flags++; }
    if (key_is_utf8_count[2]) { the_flag= 2; used_flags++; }
    if (used_flags == 1) {
        key_is_utf8_generic= the_flag;
        key_flags_rlen= 0;
    }
    used_flags= 0;
    if (val_is_utf8_count[0]) { the_flag= 0; used_flags++; }
    if (val_is_utf8_count[1]) { the_flag= 1; used_flags++; }
    if (used_flags == 1) {
        val_is_utf8_generic= the_flag;
        val_flags_rlen= 0;
    }

    total_size=
        + header_rlen
        + state_rlen
        + table_rlen
        + key_flags_rlen
        + val_flags_rlen
        + str_rlen

MinPerfHashTwoLevel.xs  view on Meta::CPAN


    head->magic_num= 1278363728;
    head->variant= variant;
    head->num_buckets= bucket_count;
    head->state_ofs= header_rlen;
    head->table_ofs= head->state_ofs + state_rlen;
    head->key_flags_ofs= head->table_ofs + table_rlen;
    head->val_flags_ofs= head->key_flags_ofs + key_flags_rlen;
    head->str_buf_ofs= head->val_flags_ofs + val_flags_rlen;

    if (val_is_utf8_generic >= 0)
        head->general_flags |= (MPH_VALS_ARE_SAME_UTF8NESS_FLAG_BIT | (val_is_utf8_generic << MPH_VALS_ARE_SAME_UTF8NESS_SHIFT));
    if (key_is_utf8_generic >= 0)
        head->general_flags |= (MPH_KEYS_ARE_SAME_UTF8NESS_FLAG_BIT | (key_is_utf8_generic << MPH_KEYS_ARE_SAME_UTF8NESS_SHIFT));

    state= start + head->state_ofs;
    table= (struct mph_bucket *)(start + head->table_ofs);
    key_flags= start + head->key_flags_ofs;
    val_flags= start + head->val_flags_ofs;
    str_buf_start= start + head->str_buf_ofs;
    str_buf_end= start + total_size;
    str_buf_pos= str_buf_start + 2;

    Copy(state_pv,state,state_len,char);

MinPerfHashTwoLevel.xs  view on Meta::CPAN

        HE *val_normalized_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED,0);
        HE *xor_val_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_XOR_VAL,0);

        if (xor_val_he) {
            table[i].xor_val= SvUV(HeVAL(xor_val_he));
        } else {
            table[i].xor_val= 0;
        }
        SETOFS(i,key_normalized_he,table,key_ofs,key_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
        SETOFS(i,val_normalized_he,table,val_ofs,val_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
        if ( key_is_utf8_generic < 0) {
            HE *key_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
            if (key_is_utf8_he) {
                UV u= SvUV(HeVAL(key_is_utf8_he));
                SETBITS(u,key_flags,i,2);
            } else {
                croak("panic: out of memory? no key_is_utf8_he for %u",i);
            }
        }
        if ( val_is_utf8_generic < 0 ) {
            HE *val_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
            if (val_is_utf8_he) {
                UV u= SvUV(HeVAL(val_is_utf8_he));
                SETBITS(u,val_flags,i,1);
            } else {
                croak("panic: out of memory? no val_is_utf8_he for %u",i);
            }
        }
    }
    *str_buf_pos =   0; str_buf_pos++;
    *str_buf_pos = 128; str_buf_pos++;
    {
        U32 r= (str_buf_pos - start) % alignment;
        if (r) {
            str_buf_pos += (alignment - r);
        }

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN


    x = ((x >> 16) ^ x) * 0x45d9f3b;
    x = ((x >> 16) ^ x) * 0x45d9f3b;
    x = ((x >> 16) ^ x);

which is just a simple 32 bit integer hash function I found at
https://stackoverflow.com/a/12996028, but any decent reversible
integer hash function would do. 

*NOTE* in Perl a given string may have differing binary representations
if it is encoded as utf8 or not. This module uses the same conventions
as Perl itself, which is that keys are stored in their minimal form when
possible, and are only stored in their unicode (utf8) form when they
cannot be downgraded to latin-1. This ensures that the unicode and latin-1
representations of a given string are treated as the same key. This module
deals with this by "normalizing" the keys and values into latin-1, but
tracking the representation as a flag. See key_normalized and key_is_utf8
(and their 'val' equivalents) documented in the construct method.

=head2 METHODS

=over 4

=item new

Construct a new Algorithm::MinPerfHashTwoLevel object. Optional arguments
which may be provided are 'source_hash' which is a hash reference to use

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN

on the 'source_hash' passed into the constructor, or requires one to passed
in as an argument.

Returns an array of hashes containing information about each bucket:

          {
            "h1_keys" => 2,
            "h0" => "17713559403787135240",
            "idx" => 0,
            "key" => "\x{103}",
            "key_is_utf8" => 1,
            "key_normalized" => "\304\203",
            "val" => "\x{103}",
            "val_is_utf8" => 1,
            "val_normalized" => "\304\203",
            "xor_val" => 2
          },

The meaning of these keys is as follows:

=over 4

=item h1_keys

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN

The hash value computed for this key.

=item idx

The index of this bucket.

=item key

The key for this bucket as a perl string. (See key_normalized.)

=item key_is_utf8

Whether this key is encoded as utf8. Will be one of
0 for "not utf8", 1 for "is utf8", and 2 for "was utf8"
meaning the key is stored as latin-1, but will be upgraded
when fetched.

=item key_normalized

The raw bytes of the normalized key. (See key_is_utf8.)

=item val

The value for this bucket as a perl string. (See val_normalized.)

=item val_is_utf8

Whether this key is encoded as utf8. Will be either
0 for "not utf8" or 1 for "is utf8".

=item val_normalized

The raw bytes of the normalized key. (See val_is_utf8).

=item xor_val

The mask to be xor'ed with the second hash (h2) to determine
the actual lookup bucket. If the xor_val for a given bucket
is 0 then the key is not in the hash.

=back

=back

lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm  view on Meta::CPAN

=head2 FILE FORMAT

Currently there is only one support file format variant, 5.

The file structure consists of a header, followed by a byte vector of seed/state
data for the hash function, followed by a bucket table with records of a fixed size,
optionally followed by a bitvector of the flags for the keys with two bits per key,
optionally followed by a bitvector of flags for values with one bit per value,
followed by a string table containing the comment for the file and the strings it 
contains, and lastly a checksum; the last 8 bytes of the file contain a hash of the
rest of the file. The key flags may be 0 for "latin-1/not-utf8", 1 for "is-utf8", 
and 2 for "was-utf8" which is used for keys which can be represented as latin-1, 
but should be restored as unicode/utf8. The val flags are similar but do not (need to) 
support "was-utf8".

Structure:

    Header
    Hash-state
    Bucket-table
    Key flags (optional)
    Val flags (optional)
    Strings
    Checksum

mph2l.h  view on Meta::CPAN

} STMT_END

#define MPH_INIT_ALL_KEYSV() STMT_START {\
    MY_CXT_INIT;                                                \
    MPH_INIT_KEYSV(MPH_KEYSV_IDX,"idx");                        \
    MPH_INIT_KEYSV(MPH_KEYSV_H1_KEYS,"h1_keys");                \
    MPH_INIT_KEYSV(MPH_KEYSV_XOR_VAL,"xor_val");                \
    MPH_INIT_KEYSV(MPH_KEYSV_H0,"h0");                          \
    MPH_INIT_KEYSV(MPH_KEYSV_KEY,"key");                        \
    MPH_INIT_KEYSV(MPH_KEYSV_KEY_NORMALIZED,"key_normalized");  \
    MPH_INIT_KEYSV(MPH_KEYSV_KEY_IS_UTF8,"key_is_utf8");        \
    MPH_INIT_KEYSV(MPH_KEYSV_VAL,"val");                        \
    MPH_INIT_KEYSV(MPH_KEYSV_VAL_NORMALIZED,"val_normalized");  \
    MPH_INIT_KEYSV(MPH_KEYSV_VAL_IS_UTF8,"val_is_utf8");        \
                                                                \
    MPH_INIT_KEYSV(MPH_KEYSV_VARIANT,"variant");                \
    MPH_INIT_KEYSV(MPH_KEYSV_COMPUTE_FLAGS,"compute_flags");    \
    MPH_INIT_KEYSV(MPH_KEYSV_STATE,"state");                    \
    MPH_INIT_KEYSV(MPH_KEYSV_SOURCE_HASH,"source_hash");        \
    MPH_INIT_KEYSV(MPH_KEYSV_BUF_LENGTH,"buf_length");          \
    MPH_INIT_KEYSV(MPH_KEYSV_BUCKETS,"buckets");                \
    MPH_INIT_KEYSV(MPH_KEYSV_MOUNT,"mount");                    \
} STMT_END

ppport.h  view on Meta::CPAN

    pv_escape()               NEED_pv_escape               NEED_pv_escape_GLOBAL
    pv_pretty()               NEED_pv_pretty               NEED_pv_pretty_GLOBAL
    sv_2pv_flags()            NEED_sv_2pv_flags            NEED_sv_2pv_flags_GLOBAL
    sv_2pvbyte()              NEED_sv_2pvbyte              NEED_sv_2pvbyte_GLOBAL
    sv_catpvf_mg()            NEED_sv_catpvf_mg            NEED_sv_catpvf_mg_GLOBAL
    sv_catpvf_mg_nocontext()  NEED_sv_catpvf_mg_nocontext  NEED_sv_catpvf_mg_nocontext_GLOBAL
    sv_pvn_force_flags()      NEED_sv_pvn_force_flags      NEED_sv_pvn_force_flags_GLOBAL
    sv_setpvf_mg()            NEED_sv_setpvf_mg            NEED_sv_setpvf_mg_GLOBAL
    sv_setpvf_mg_nocontext()  NEED_sv_setpvf_mg_nocontext  NEED_sv_setpvf_mg_nocontext_GLOBAL
    sv_unmagicext()           NEED_sv_unmagicext           NEED_sv_unmagicext_GLOBAL
    utf8_to_uvchr_buf()       NEED_utf8_to_uvchr_buf       NEED_utf8_to_uvchr_buf_GLOBAL
    vload_module()            NEED_vload_module            NEED_vload_module_GLOBAL
    vmess()                   NEED_vmess                   NEED_vmess_GLOBAL
    vnewSVpvf()               NEED_vnewSVpvf               NEED_vnewSVpvf_GLOBAL
    warner()                  NEED_warner                  NEED_warner_GLOBAL

To avoid namespace conflicts, you can change the namespace of the
explicitly exported functions / variables using the C<DPPP_NAMESPACE>
macro. Just C<#define> the macro before including C<ppport.h>:

    #define DPPP_NAMESPACE MyOwnNamespace_

ppport.h  view on Meta::CPAN

PERL_MAGIC_shared_scalar|5.007003||p
PERL_MAGIC_shared|5.007003||p
PERL_MAGIC_sigelem|5.007002||p
PERL_MAGIC_sig|5.007002||p
PERL_MAGIC_substr|5.007002||p
PERL_MAGIC_sv|5.007002||p
PERL_MAGIC_taint|5.007002||p
PERL_MAGIC_tiedelem|5.007002||p
PERL_MAGIC_tiedscalar|5.007002||p
PERL_MAGIC_tied|5.007002||p
PERL_MAGIC_utf8|5.008001||p
PERL_MAGIC_uvar_elem|5.007003||p
PERL_MAGIC_uvar|5.007002||p
PERL_MAGIC_vec|5.007002||p
PERL_MAGIC_vstring|5.008001||p
PERL_PV_ESCAPE_ALL|5.009004||p
PERL_PV_ESCAPE_FIRSTCHAR|5.009004||p
PERL_PV_ESCAPE_NOBACKSLASH|5.009004||p
PERL_PV_ESCAPE_NOCLEAR|5.009004||p
PERL_PV_ESCAPE_QUOTE|5.009004||p
PERL_PV_ESCAPE_RE|5.009005||p

ppport.h  view on Meta::CPAN

SvPV_nomg_const|5.009003||p
SvPV_nomg_nolen|5.013007||p
SvPV_nomg|5.007002||p
SvPV_renew|5.009003||p
SvPV_set|||
SvPVbyte_force||5.009002|
SvPVbyte_nolen||5.006000|
SvPVbytex_force||5.006000|
SvPVbytex||5.006000|
SvPVbyte|5.006000||p
SvPVutf8_force||5.006000|
SvPVutf8_nolen||5.006000|
SvPVutf8x_force||5.006000|
SvPVutf8x||5.006000|
SvPVutf8||5.006000|
SvPVx|||
SvPV|||
SvREADONLY_off|||
SvREADONLY_on|||
SvREADONLY|||
SvREFCNT_dec_NN||5.017007|
SvREFCNT_dec|||
SvREFCNT_inc_NN|5.009004||p
SvREFCNT_inc_simple_NN|5.009004||p
SvREFCNT_inc_simple_void_NN|5.009004||p

ppport.h  view on Meta::CPAN

Zero|||
__ASSERT_|||p
_aMY_CXT|5.007003||p
_inverse_folds|||
_is_grapheme|||
_is_in_locale_category|||
_new_invlist_C_array|||
_pMY_CXT|5.007003||p
_to_fold_latin1|||n
_to_upper_title_latin1|||
_to_utf8_case|||
_variant_byte_number|||n
_warn_problematic_locale|||n
aMY_CXT_|5.007003||p
aMY_CXT|5.007003||p
aTHXR_|5.024000||p
aTHXR|5.024000||p
aTHX_|5.006000||p
aTHX|5.006000||p
abort_execution|||
add_above_Latin1_folds|||

ppport.h  view on Meta::CPAN

bad_type_pv|||
bind_match|||
block_end||5.004000|
block_gimme||5.004000|
block_start||5.004000|
blockhook_register||5.013003|
boolSV|5.004000||p
boot_core_PerlIO|||
boot_core_UNIVERSAL|||
boot_core_mro|||
bytes_cmp_utf8||5.013007|
cBOOL|5.013000||p
call_argv|5.006000||p
call_atexit||5.006000|
call_list||5.004000|
call_method|5.006000||p
call_pv|5.006000||p
call_sv|5.006000||p
caller_cx|5.013005|5.006000|p
calloc||5.007002|n
cando|||

ppport.h  view on Meta::CPAN

do_seek|||
do_semop|||
do_shmio|||
do_smartmatch|||
do_spawn_nowait|||
do_spawn|||
do_sprintf|||
do_sv_dump||5.006000|
do_sysseek|||
do_tell|||
do_trans_complex_utf8|||
do_trans_complex|||
do_trans_count_utf8|||
do_trans_count|||
do_trans_simple_utf8|||
do_trans_simple|||
do_trans|||
do_vecget|||
do_vecset|||
do_vop|||
docatch|||
does_utf8_overflow|||n
doeval_compile|||
dofile|||
dofindlabel|||
doform|||
doing_taint||5.008001|n
dooneliner|||
doopen_pm|||
doparseform|||
dopoptoeval|||
dopoptogivenfor|||

ppport.h  view on Meta::CPAN

find_runcv||5.008001|
find_rundefsv||5.013002|
find_script|||
find_span_end_mask|||n
find_span_end|||n
first_symbol|||n
fixup_errno_string|||
foldEQ_latin1_s2_folded|||n
foldEQ_latin1||5.013008|n
foldEQ_locale||5.013002|n
foldEQ_utf8||5.013002|
foldEQ||5.013002|n
fold_constants|||
forbid_setid|||
force_ident_maybe_lex|||
force_ident|||
force_list|||
force_next|||
force_strict_version|||
force_version|||
force_word|||

ppport.h  view on Meta::CPAN

hv_rand_set||5.018000|
hv_riter_p||5.009003|
hv_riter_set||5.009003|
hv_scalar||5.009001|
hv_store_ent||5.003070|
hv_stores|5.009004||p
hv_store|||
hv_undef_flags|||
hv_undef|||
ibcmp_locale||5.004000|
ibcmp_utf8||5.007003|
ibcmp|||
incline|||
incpush_if_exists|||
incpush_use_sep|||
incpush|||
ingroup|||
init_argv_symbols|||
init_constants|||
init_dbargs|||
init_debugger|||

ppport.h  view on Meta::CPAN

isASCII_A|||p
isASCII|5.006000||p
isBLANK_A|||p
isBLANK|5.006001||p
isC9_STRICT_UTF8_CHAR|||n
isCNTRL_A|||p
isCNTRL|5.006000||p
isDIGIT_A|||p
isDIGIT|||p
isFF_OVERLONG|||n
isFOO_utf8_lc|||
isGCB|||
isGRAPH_A|||p
isGRAPH|5.006000||p
isIDCONT_A|||p
isIDCONT|5.017008|5.017008|p
isIDFIRST_A|||p
isIDFIRST|||p
isLB|||
isLOWER_A|||p
isLOWER|||p

ppport.h  view on Meta::CPAN

isUPPER|||p
isUTF8_CHAR_flags|||
isUTF8_CHAR||5.021001|n
isWB|||
isWORDCHAR_A|||p
isWORDCHAR|5.013006|5.013006|p
isXDIGIT_A|||p
isXDIGIT|5.006000||p
is_an_int|||
is_ascii_string||5.011000|n
is_c9strict_utf8_string_loclen|||n
is_c9strict_utf8_string_loc|||n
is_c9strict_utf8_string|||n
is_handle_constructor|||n
is_invariant_string||5.021007|n
is_lvalue_sub||5.007001|
is_safe_syscall||5.019004|
is_ssc_worth_it|||n
is_strict_utf8_string_loclen|||n
is_strict_utf8_string_loc|||n
is_strict_utf8_string|||n
is_utf8_char_buf||5.015008|n
is_utf8_common_with_len|||
is_utf8_common|||
is_utf8_cp_above_31_bits|||n
is_utf8_fixed_width_buf_flags|||n
is_utf8_fixed_width_buf_loc_flags|||n
is_utf8_fixed_width_buf_loclen_flags|||n
is_utf8_invariant_string_loc|||n
is_utf8_invariant_string|||n
is_utf8_non_invariant_string|||n
is_utf8_overlong_given_start_byte_ok|||n
is_utf8_string_flags|||n
is_utf8_string_loc_flags|||n
is_utf8_string_loclen_flags|||n
is_utf8_string_loclen||5.009003|n
is_utf8_string_loc||5.008001|n
is_utf8_string||5.006001|n
is_utf8_valid_partial_char_flags|||n
is_utf8_valid_partial_char|||n
isa_lookup|||
isinfnansv|||
isinfnan||5.021004|n
items|||n
ix|||n
jmaybe|||
join_exact|||
keyword_plugin_standard|||
keyword|||
leave_scope|||

ppport.h  view on Meta::CPAN

magic_setlvref|||
magic_setmglob|||
magic_setnkeys|||
magic_setnonelem|||
magic_setpack|||
magic_setpos|||
magic_setregexp|||
magic_setsig|||
magic_setsubstr|||
magic_settaint|||
magic_setutf8|||
magic_setuvar|||
magic_setvec|||
magic_set|||
magic_sizepack|||
magic_wipepack|||
make_matcher|||
make_trie|||
malloc_good_size|||n
malloced_size|||n
malloc||5.007002|n

ppport.h  view on Meta::CPAN

mro_register||5.010001|
mro_set_mro||5.010001|
mro_set_private_data||5.010001|
mul128|||
multiconcat_stringify|||
multideref_stringify|||
my_atof2||5.007002|
my_atof3|||
my_atof||5.006000|
my_attrs|||
my_bytes_to_utf8|||n
my_chsize|||
my_clearenv|||
my_cxt_index|||
my_cxt_init|||
my_dirfd||5.009005|n
my_exit_jump|||
my_exit|||
my_failure_exit||5.004000|
my_fflush_all||5.006000|
my_fork||5.007003|n

ppport.h  view on Meta::CPAN

my_stat_flags|||
my_stat||5.024000|
my_strerror|||
my_strftime||5.007002|
my_strlcat|5.009004||pn
my_strlcpy|5.009004||pn
my_strnlen|||pn
my_strtod|||n
my_unexec|||
my_vsnprintf||5.009004|n
need_utf8|||n
newANONATTRSUB||5.006000|
newANONHASH|||
newANONLIST|||
newANONSUB|||
newASSIGNOP|||
newATTRSUB_x|||
newATTRSUB||5.006000|
newAVREF|||
newAV|||
newBINOP|||

ppport.h  view on Meta::CPAN

newSVavdefelem|||
newSVhek||5.009003|
newSViv|||
newSVnv|||
newSVpadname||5.017004|
newSVpv_share||5.013006|
newSVpvf_nocontext|||vn
newSVpvf||5.004000|v
newSVpvn_flags|5.010001||p
newSVpvn_share|5.007001||p
newSVpvn_utf8|5.010001||p
newSVpvn|5.004050||p
newSVpvs_flags|5.010001||p
newSVpvs_share|5.009003||p
newSVpvs|5.009003||p
newSVpv|||
newSVrv|||
newSVsv_flags|||
newSVsv_nomg|||
newSVsv|||
newSVuv|5.006000||p

ppport.h  view on Meta::CPAN

nextargv|||
nextchar|||
ninstr|||n
no_bareword_allowed|||
no_fh_allowed|||
no_op|||
noperl_die|||vn
not_a_number|||
not_incrementable|||
nothreadhook||5.008000|
notify_parser_that_changed_to_utf8|||
nuke_stacks|||
num_overflow|||n
oopsAV|||
oopsHV|||
op_append_elem||5.013006|
op_append_list||5.013006|
op_class|||
op_clear|||
op_contextualize||5.013006|
op_convert_list||5.021006|

ppport.h  view on Meta::CPAN

sv_2iuv_common|||
sv_2iuv_non_preserve|||
sv_2iv_flags||5.009001|
sv_2iv|||
sv_2mortal|||
sv_2nv_flags||5.013001|
sv_2pv_flags|5.007002||p
sv_2pv_nolen|5.006000||p
sv_2pvbyte_nolen|5.006000||p
sv_2pvbyte|5.006000||p
sv_2pvutf8_nolen||5.006000|
sv_2pvutf8||5.006000|
sv_2pv|||
sv_2uv_flags||5.009001|
sv_2uv|5.004000||p
sv_add_arena|||
sv_add_backref|||
sv_backoff|||n
sv_bless|||
sv_buf_to_ro|||
sv_buf_to_rw|||
sv_cat_decode||5.008001|

ppport.h  view on Meta::CPAN

sv_gets||5.003070|
sv_grow|||
sv_i_ncmp|||
sv_inc_nomg||5.013002|
sv_inc|||
sv_insert_flags||5.010001|
sv_insert|||
sv_isa|||
sv_isobject|||
sv_iv||5.005000|
sv_len_utf8_nomg|||
sv_len_utf8||5.006000|
sv_len|||
sv_magic_portable|5.024000|5.004000|p
sv_magicext_mglob|||
sv_magicext||5.007003|
sv_magic|||
sv_mortalcopy_flags|||
sv_mortalcopy|||
sv_ncmp|||
sv_newmortal|||
sv_newref|||

ppport.h  view on Meta::CPAN

sv_pos_u2b_forwards|||n
sv_pos_u2b_midway|||n
sv_pos_u2b||5.006000|
sv_pvbyten_force||5.006000|
sv_pvbyten||5.006000|
sv_pvbyte||5.006000|
sv_pvn_force_flags|5.007002||p
sv_pvn_force|||
sv_pvn_nomg|5.007003|5.005000|p
sv_pvn||5.005000|
sv_pvutf8n_force||5.006000|
sv_pvutf8n||5.006000|
sv_pvutf8||5.006000|
sv_pv||5.006000|
sv_recode_to_utf8||5.007003|
sv_reftype|||
sv_ref||5.015004|
sv_replace|||
sv_report_used|||
sv_resetpvn|||
sv_reset|||
sv_rvunweaken|||
sv_rvweaken||5.006000|
sv_set_undef|||
sv_sethek|||

ppport.h  view on Meta::CPAN

sv_uni_display||5.007003|
sv_unmagicext|5.013008||p
sv_unmagic|||
sv_unref_flags||5.007001|
sv_unref|||
sv_untaint||5.004000|
sv_upgrade|||
sv_usepvn_flags||5.009004|
sv_usepvn_mg|5.004050||p
sv_usepvn|||
sv_utf8_decode|||
sv_utf8_downgrade|||
sv_utf8_encode||5.006000|
sv_utf8_upgrade_flags_grow||5.011000|
sv_utf8_upgrade_flags||5.007002|
sv_utf8_upgrade_nomg||5.007002|
sv_utf8_upgrade||5.007001|
sv_uv|5.005000||p
sv_vcatpvf_mg|5.006000|5.004000|p
sv_vcatpvfn_flags||5.017002|
sv_vcatpvfn||5.004000|
sv_vcatpvf|5.006000|5.004000|p
sv_vsetpvf_mg|5.006000|5.004000|p
sv_vsetpvfn||5.004000|
sv_vsetpvf|5.006000|5.004000|p
svtype|||
swallow_bom|||

ppport.h  view on Meta::CPAN

sys_init3||5.010000|n
sys_init||5.010000|n
sys_intern_clear|||
sys_intern_dup|||
sys_intern_init|||
sys_term||5.010000|n
taint_env|||
taint_proper|||
tied_method|||v
tmps_grow_p|||
toFOLD_utf8_safe|||
toFOLD_utf8||5.019001|
toFOLD_uvchr||5.023009|
toFOLD||5.019001|
toLOWER_L1||5.019001|
toLOWER_LC||5.004000|
toLOWER_utf8_safe|||
toLOWER_utf8||5.015007|
toLOWER_uvchr||5.023009|
toLOWER|||
toTITLE_utf8_safe|||
toTITLE_utf8||5.015007|
toTITLE_uvchr||5.023009|
toTITLE||5.019001|
toUPPER_utf8_safe|||
toUPPER_utf8||5.015007|
toUPPER_uvchr||5.023009|
toUPPER|||
to_byte_substr|||
to_lower_latin1|||n
to_utf8_substr|||
tokenize_use|||
tokeq|||
tokereport|||
too_few_arguments_pv|||
too_many_arguments_pv|||
translate_substr_offsets|||n
traverse_op_tree|||
try_amagic_bin|||
try_amagic_un|||
turkic_fc|||

ppport.h  view on Meta::CPAN

unpackstring||5.008001|
unreferenced_to_tmp_stack|||
unshare_hek_or_pvn|||
unshare_hek|||
unsharepvn||5.003070|
unwind_handler_stack|||
update_debugger_info|||
upg_version||5.009005|
usage|||
utf16_textfilter|||
utf16_to_utf8_reversed||5.006001|
utf16_to_utf8||5.006001|
utf8_distance||5.006000|
utf8_hop_back|||n
utf8_hop_forward|||n
utf8_hop_safe|||n
utf8_hop||5.006000|n
utf8_length||5.007001|
utf8_mg_len_cache_update|||
utf8_mg_pos_cache_update|||
utf8_to_uvchr_buf|5.015009|5.015009|p
utf8_to_uvchr|||p
utf8n_to_uvchr_error|||n
utf8n_to_uvchr||5.007001|n
utf8n_to_uvuni||5.007001|
utilize|||
uvchr_to_utf8_flags||5.007003|
uvchr_to_utf8||5.007001|
uvoffuni_to_utf8_flags||5.019004|
uvuni_to_utf8_flags||5.007003|
uvuni_to_utf8||5.007001|
valid_utf8_to_uvchr|||n
validate_suid|||
variant_under_utf8_count|||n
varname|||
vcmp||5.009000|
vcroak||5.006000|
vdeb||5.007003|
vform||5.006000|
visit|||
vivify_defelem|||
vivify_ref|||
vload_module|5.006000||p
vmess|5.006000|5.006000|p

ppport.h  view on Meta::CPAN

    return p - str;
}

#endif
#endif

#if (PERL_BCDVERSION < 0x5031002)
        /* Versions prior to this accepted things that are now considered
         * malformations, and didn't return -1 on error with warnings enabled
         * */
#  undef utf8_to_uvchr_buf
#endif

/* This implementation brings modern, generally more restricted standards to
 * utf8_to_uvchr_buf.  Some of these are security related, and clearly must
 * be done.  But its arguable that the others need not, and hence should not.
 * The reason they're here is that a module that intends to play with the
 * latest perls shoud be able to work the same in all releases.  An example is
 * that perl no longer accepts any UV for a code point, but limits them to
 * IV_MAX or below.  This is for future internal use of the larger code points.
 * If it turns out that some of these changes are breaking code that isn't
 * intended to work with modern perls, the tighter restrictions could be
 * relaxed.  khw thinks this is unlikely, but has been wrong in the past. */

#ifndef utf8_to_uvchr_buf
   /* Choose which underlying implementation to use.  At least one must be
    * present or the perl is too early to handle this function */
#  if defined(utf8n_to_uvchr) || defined(utf8_to_uv)
#    if defined(utf8n_to_uvchr)   /* This is the preferred implementation */
#      define _ppport_utf8_to_uvchr_buf_callee utf8n_to_uvchr
#    else
#      define _ppport_utf8_to_uvchr_buf_callee utf8_to_uv
#    endif

#  endif

#ifdef _ppport_utf8_to_uvchr_buf_callee
#  if defined(NEED_utf8_to_uvchr_buf)
static UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send, STRLEN * retlen);
static
#else
extern UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send, STRLEN * retlen);
#endif

#if defined(NEED_utf8_to_uvchr_buf) || defined(NEED_utf8_to_uvchr_buf_GLOBAL)

#ifdef utf8_to_uvchr_buf
#  undef utf8_to_uvchr_buf
#endif
#define utf8_to_uvchr_buf(a,b,c) DPPP_(my_utf8_to_uvchr_buf)(aTHX_ a,b,c)
#define Perl_utf8_to_uvchr_buf DPPP_(my_utf8_to_uvchr_buf)


UV
DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
{
    UV ret;
    STRLEN curlen;
    bool overflows = 0;
    const U8 *cur_s = s;
    const bool do_warnings = ckWARN_d(WARN_UTF8);

    if (send > s) {
        curlen = send - s;
    }

ppport.h  view on Meta::CPAN

        assert(0);  /* Modern perls die under this circumstance */
        curlen = 0;
        if (! do_warnings) {    /* Handle empty here if no warnings needed */
            if (retlen) *retlen = 0;
            return UNICODE_REPLACEMENT;
        }
    }

    /* The modern version allows anything that evaluates to a legal UV, but not
     * overlongs nor an empty input */
    ret = _ppport_utf8_to_uvchr_buf_callee(
                s, curlen, retlen,   (UTF8_ALLOW_ANYUV
                                  & ~(UTF8_ALLOW_LONG|UTF8_ALLOW_EMPTY)));

    /* But actually, modern versions restrict the UV to being no more than what
     * an IV can hold */
    if (ret > PERL_INT_MAX) {
        overflows = 1;
    }

#    if (PERL_BCDVERSION < 0x5026000)

ppport.h  view on Meta::CPAN

                    ret, *cur_s, *s);
            }
            if (retlen) {
                *retlen = (STRLEN) -1;
            }
            return 0;
        }
    }

    /* If failed and warnings are off, to emulate the behavior of the real
     * utf8_to_uvchr(), try again, allowing anything.  (Note a return of 0 is
     * ok if the input was '\0') */
    if (UNLIKELY(ret == 0 && (curlen == 0 || *s != '\0'))) {

        /* If curlen is 0, we already handled the case where warnings are
         * disabled, so this 'if' will be true, and we won't look at the
         * contents of 's' */
        if (do_warnings) {
            *retlen = (STRLEN) -1;
        }
        else {
            ret = _ppport_utf8_to_uvchr_buf_callee(
                                            s, curlen, retlen, UTF8_ALLOW_ANY);
            /* Override with the REPLACEMENT character, as that is what the
             * modern version of this function returns */
            ret = UNICODE_REPLACEMENT;

#           if (PERL_BCDVERSION < 0x5016000)

            /* Versions earlier than this don't necessarily return the proper
             * length.  It should not extend past the end of string, nor past
             * what the first byte indicates the length is, nor past the

ppport.h  view on Meta::CPAN

        }
    }

    return ret;
}

#  endif
#endif
#endif

#if defined(UTF8SKIP) && defined(utf8_to_uvchr_buf)
#undef utf8_to_uvchr /* Always redefine this unsafe function so that it refuses
                        to read past a NUL, making it much less likely to read
                        off the end of the buffer.  A NUL indicates the start
                        of the next character anyway.  If the input isn't
                        NUL-terminated, the function remains unsafe, as it
                        always has been. */
#ifndef utf8_to_uvchr
#  define utf8_to_uvchr(s, lp)           \
    ((*(s) == '\0')                                                             \
    ? utf8_to_uvchr_buf(s,((s)+1), lp) /* Handle single NUL specially */        \
    : utf8_to_uvchr_buf(s, (s) + my_strnlen((char *) (s), UTF8SKIP(s)), (lp)))
#endif

#endif

#ifdef HAS_MEMCMP
#ifndef memNE
#  define memNE(s1,s2,l)                 (memcmp(s1,s2,l))
#endif

#ifndef memEQ

ppport.h  view on Meta::CPAN

#endif

#ifndef PERL_MAGIC_vstring
#  define PERL_MAGIC_vstring             'V'
#endif

#ifndef PERL_MAGIC_vec
#  define PERL_MAGIC_vec                 'v'
#endif

#ifndef PERL_MAGIC_utf8
#  define PERL_MAGIC_utf8                'w'
#endif

#ifndef PERL_MAGIC_substr
#  define PERL_MAGIC_substr              'x'
#endif

#ifndef PERL_MAGIC_defelem
#  define PERL_MAGIC_defelem             'y'
#endif

ppport.h  view on Meta::CPAN

#if (PERL_BCDVERSION < 0x5006000)
# define D_PPP_CONSTPV_ARG(x)  ((char *) (x))
#else
# define D_PPP_CONSTPV_ARG(x)  (x)
#endif
#ifndef newSVpvn
#  define newSVpvn(data,len)             ((data)                                              \
                                    ? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \
                                    : newSV(0))
#endif
#ifndef newSVpvn_utf8
#  define newSVpvn_utf8(s, len, u)       newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
#endif
#ifndef SVf_UTF8
#  define SVf_UTF8                       0
#endif

#ifndef newSVpvn_flags

#if defined(NEED_newSVpvn_flags)
static SV * DPPP_(my_newSVpvn_flags)(pTHX_ const char *s, STRLEN len, U32 flags);
static

ppport.h  view on Meta::CPAN

#ifdef sv_2pvbyte
#  undef sv_2pvbyte
#endif
#define sv_2pvbyte(a,b) DPPP_(my_sv_2pvbyte)(aTHX_ a,b)
#define Perl_sv_2pvbyte DPPP_(my_sv_2pvbyte)


char *
DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv, STRLEN *lp)
{
  sv_utf8_downgrade(sv,0);
  return SvPV(sv,*lp);
}

#endif

/* Hint: sv_2pvbyte
 * Use the SvPVbyte() macro instead of sv_2pvbyte().
 */

#undef SvPVbyte

ppport.h  view on Meta::CPAN

	const MGVTBL* const virt = mg->mg_virtual;
	if (mg->mg_type == type && virt == vtbl) {
	    *mgp = mg->mg_moremagic;
	    if (virt && virt->svt_free)
		virt->svt_free(aTHX_ sv, mg);
	    if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) {
		if (mg->mg_len > 0)
		    Safefree(mg->mg_ptr);
		else if (mg->mg_len == HEf_SVKEY) /* Questionable on older perls... */
		    SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr));
		else if (mg->mg_type == PERL_MAGIC_utf8)
		    Safefree(mg->mg_ptr);
            }
	    if (mg->mg_flags & MGf_REFCOUNTED)
		SvREFCNT_dec(mg->mg_obj);
	    Safefree(mg);
	}
	else
	    mgp = &mg->mg_moremagic;
    }
    if (SvMAGIC(sv)) {

ppport.h  view on Meta::CPAN

DPPP_(my_pv_escape)(pTHX_ SV *dsv, char const * const str,
  const STRLEN count, const STRLEN max,
  STRLEN * const escaped, const U32 flags)
{
    const char esc = flags & PERL_PV_ESCAPE_RE ? '%' : '\\';
    const char dq = flags & PERL_PV_ESCAPE_QUOTE ? '"' : esc;
    char octbuf[32] = "%123456789ABCDF";
    STRLEN wrote = 0;
    STRLEN chsize = 0;
    STRLEN readsize = 1;
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
    bool isuni = flags & PERL_PV_ESCAPE_UNI ? 1 : 0;
#endif
    const char *pv  = str;
    const char * const end = pv + count;
    octbuf[0] = esc;

    if (!(flags & PERL_PV_ESCAPE_NOCLEAR))
        sv_setpvs(dsv, "");

#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
    if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8*)pv, count))
        isuni = 1;
#endif

    for (; pv < end && (!max || wrote < max) ; pv += readsize) {
        const UV u =
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
                     isuni ? utf8_to_uvchr_buf((U8*)pv, end, &readsize) :
#endif
                             (U8)*pv;
        const U8 c = (U8)u & 0xFF;

        if (u > 255 || (flags & PERL_PV_ESCAPE_ALL)) {
            if (flags & PERL_PV_ESCAPE_FIRSTCHAR)
                chsize = my_snprintf(octbuf, sizeof octbuf,
                                      "%" UVxf, u);
            else
                chsize = my_snprintf(octbuf, sizeof octbuf,

t/Algo_v5.t  view on Meta::CPAN

}

my %hash= ("A" .. "Z");
my $buckets= $o->compute(\%hash);
is_deeply($buckets,
    [
           {
             "h0" => "2811388775115704789",
             "idx" => 0,
             "key" => "M",
             "key_is_utf8" => 0,
             "key_normalized" => "M",
             "val" => "N",
             "val_is_utf8" => 0,
             "val_normalized" => "N"
           },
           {
             "h0" => "8860848721851830215",
             "h1_keys" => 2,
             "idx" => 1,
             "key" => "K",
             "key_is_utf8" => 0,
             "key_normalized" => "K",
             "val" => "L",
             "val_is_utf8" => 0,
             "val_normalized" => "L",
             "xor_val" => 1
           },
           {
             "h0" => "2333874141850904510",
             "h1_keys" => 1,
             "idx" => 2,
             "key" => "G",
             "key_is_utf8" => 0,
             "key_normalized" => "G",
             "val" => "H",
             "val_is_utf8" => 0,
             "val_normalized" => "H",
             "xor_val" => "4294967295"
           },
           {
             "h0" => "16876117158654460122",
             "idx" => 3,
             "key" => "Y",
             "key_is_utf8" => 0,
             "key_normalized" => "Y",
             "val" => "Z",
             "val_is_utf8" => 0,
             "val_normalized" => "Z"
           },
           {
             "h0" => "8715353882719101949",
             "h1_keys" => 1,
             "idx" => 4,
             "key" => "S",
             "key_is_utf8" => 0,
             "key_normalized" => "S",
             "val" => "T",
             "val_is_utf8" => 0,
             "val_normalized" => "T",
             "xor_val" => "4294967294"
           },
           {
             "h0" => "7118026915973626049",
             "h1_keys" => 1,
             "idx" => 5,
             "key" => "U",
             "key_is_utf8" => 0,
             "key_normalized" => "U",
             "val" => "V",
             "val_is_utf8" => 0,
             "val_normalized" => "V",
             "xor_val" => "4294967293"
           },
           {
             "h0" => "8257329964001049281",
             "h1_keys" => 2,
             "idx" => 6,
             "key" => "O",
             "key_is_utf8" => 0,
             "key_normalized" => "O",
             "val" => "P",
             "val_is_utf8" => 0,
             "val_normalized" => "P",
             "xor_val" => 1
           },
           {
             "h0" => "5518171323424817881",
             "h1_keys" => 1,
             "idx" => 7,
             "key" => "E",
             "key_is_utf8" => 0,
             "key_normalized" => "E",
             "val" => "F",
             "val_is_utf8" => 0,
             "val_normalized" => "F",
             "xor_val" => "4294967291"
           },
           {
             "h0" => "3591181703942984702",
             "h1_keys" => 1,
             "idx" => 8,
             "key" => "Q",
             "key_is_utf8" => 0,
             "key_normalized" => "Q",
             "val" => "R",
             "val_is_utf8" => 0,
             "val_normalized" => "R",
             "xor_val" => "4294967288"
           },
           {
             "h0" => "8458370515337648683",
             "h1_keys" => 1,
             "idx" => 9,
             "key" => "W",
             "key_is_utf8" => 0,
             "key_normalized" => "W",
             "val" => "X",
             "val_is_utf8" => 0,
             "val_normalized" => "X",
             "xor_val" => "4294967286"
           },
           {
             "h0" => "18153270191496908466",
             "idx" => 10,
             "key" => "C",
             "key_is_utf8" => 0,
             "key_normalized" => "C",
             "val" => "D",
             "val_is_utf8" => 0,
             "val_normalized" => "D"
           },
           {
             "h0" => "5043040936135718210",
             "h1_keys" => 2,
             "idx" => 11,
             "key" => "A",
             "key_is_utf8" => 0,
             "key_normalized" => "A",
             "val" => "B",
             "val_is_utf8" => 0,
             "val_normalized" => "B",
             "xor_val" => 2
           },
           {
             "h0" => "15354065489600908969",
             "h1_keys" => 1,
             "idx" => 12,
             "key" => "I",
             "key_is_utf8" => 0,
             "key_normalized" => "I",
             "val" => "J",
             "val_is_utf8" => 0,
             "val_normalized" => "J",
             "xor_val" => "4294967285"
           }
    ],
    "simple hash A-Z",
) or diag Dumper($buckets);

%hash=map {
    my $key= chr($_);
    utf8::upgrade($key) if $_ % 2;
    $key => $key
} 250 .. 260;
$buckets= $o->compute(\%hash);
is_deeply($buckets,
    [
           {
             "h0" => "7329038856428266488",
             "h1_keys" => 1,
             "idx" => 0,
             "key" => "\376",
             "key_is_utf8" => 0,
             "key_normalized" => "\376",
             "val" => "\376",
             "val_is_utf8" => 0,
             "val_normalized" => "\376",
             "xor_val" => "4294967293"
           },
           {
             "h0" => "7406429479659263986",
             "h1_keys" => 2,
             "idx" => 1,
             "key" => "\x{101}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\201",
             "val" => "\x{101}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\201",
             "xor_val" => 2
           },
           {
             "h0" => "7026079938176527097",
             "h1_keys" => 1,
             "idx" => 2,
             "key" => "\x{103}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\203",
             "val" => "\x{103}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\203",
             "xor_val" => "4294967292"
           },
           {
             "h0" => "2560445542346638988",
             "idx" => 3,
             "key" => "\x{100}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\200",
             "val" => "\x{100}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\200"
           },
           {
             "h0" => "15075586565050556610",
             "h1_keys" => 3,
             "idx" => 4,
             "key" => "\x{102}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\202",
             "val" => "\x{102}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\202",
             "xor_val" => 1
           },
           {
             "h0" => "10520228168695442556",
             "h1_keys" => 1,
             "idx" => 5,
             "key" => "\x{ff}",
             "key_is_utf8" => 2,
             "key_normalized" => "\377",
             "val" => "\x{ff}",
             "val_is_utf8" => 1,
             "val_normalized" => "\303\277",
             "xor_val" => "4294967290"
           },
           {
             "h0" => "10697593882658072482",
             "h1_keys" => 1,
             "idx" => 6,
             "key" => "\372",
             "key_is_utf8" => 0,
             "key_normalized" => "\372",
             "val" => "\372",
             "val_is_utf8" => 0,
             "val_normalized" => "\372",
             "xor_val" => "4294967286"
           },
           {
             "h0" => "12905152200806003791",
             "idx" => 7,
             "key" => "\374",
             "key_is_utf8" => 0,
             "key_normalized" => "\374",
             "val" => "\374",
             "val_is_utf8" => 0,
             "val_normalized" => "\374"
           },
           {
             "h0" => "11318277185996573588",
             "idx" => 8,
             "key" => "\x{fd}",
             "key_is_utf8" => 2,
             "key_normalized" => "\375",
             "val" => "\x{fd}",
             "val_is_utf8" => 1,
             "val_normalized" => "\303\275"
           },
           {
             "h0" => "11837163118808456557",
             "idx" => 9,
             "key" => "\x{104}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\204",
             "val" => "\x{104}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\204"
           },
           {
             "h0" => "13939891494320893160",
             "h1_keys" => 2,
             "idx" => 10,
             "key" => "\x{fb}",
             "key_is_utf8" => 2,
             "key_normalized" => "\373",
             "val" => "\x{fb}",
             "val_is_utf8" => 1,
             "val_normalized" => "\303\273",
             "xor_val" => 2
           }
    ],
    "hash with utf8 works as expected",
) or diag Dumper($buckets);


t/OnDisk.pl  view on Meta::CPAN

ok(defined($srand),"srand as expected: $srand");
my $eval_ok= eval {
    tie my(%fail), $class => $0;
    1;
};
my $error= !$eval_ok && $@;
ok($error,"it failed: $@");

my $tmpdir= File::Temp->newdir();

my $not_utf8= "not utf8: \x{DF}";
my $utf8_can_be_downgraded= "was utf8: \x{DF}";
utf8::upgrade($utf8_can_be_downgraded);
my $must_be_utf8= "is utf8: \x{100}"; # this can ONLY be represented as utf8
my @source_hashes= (
    simple => {
        foo => "bar",
        baz => "bop",
        fiz => "shmang",
        plop => "shwoosh",
    },
    large => { map { $_ => $_ } 1 .. 50000 },
    mixed_utf8 => {
        $not_utf8 => $not_utf8,
        $utf8_can_be_downgraded => $utf8_can_be_downgraded,
        $must_be_utf8 => $must_be_utf8,
        map { chr($_) => chr($_) } 240 .. 270,
    },
    pow2_08 =>
    { map { $_ => $_ } 1 .. 8 },
    pow2_16 =>
    { map { $_ => $_ } 1 .. 16 },
    pow2_32 =>
    { map { $_ => $_ } 1 .. 32 },
    pow2_64 =>
    { map { $_ => $_ } 1 .. 64 },

    chr_chr_utf8 =>
    { map { chr($_) => chr($_) } 256 .. 270 },
    chr_num_utf8 =>
    { map { chr($_) => $_ } 256 .. 270 },
    num_chr_utf8 =>
    { map { $_ => chr($_) } 256 .. 270 },
    mix_mix_utf8 =>
    { map { ($_ % 2 ? chr($_) : $_) => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
    chr_mix_utf8 =>
    { map { chr($_)                 => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
    num_mix_utf8 =>
    { map { $_                      => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
    mix_num_utf8 =>
    { map { ($_ % 2 ? chr($_) : $_) => $_                      } 256 .. 270 },
    mix_chr_utf8 =>
    { map { ($_ % 2 ? chr($_) : $_) => chr($_)                 } 256 .. 270 },

);

my $rand_seed= join("",map chr(rand 256), 1..16);
foreach my $seed ("1234567812345678", undef, $rand_seed) {
    foreach my $idx (0 .. (@source_hashes/2)-1) {
        my $name= $source_hashes[$idx*2];
        my $source_hash= $source_hashes[$idx*2+1];
        foreach my $variant (defined($ENV{VARIANT}) ? ($ENV{VARIANT}) : (MIN_VARIANT .. MAX_VARIANT)) {

t/OnDisk.pl  view on Meta::CPAN

                    is_deeply(\@got_keys,\@want_keys,"keys in both are the same ($title)");
                    is_deeply(\@got_each_keys,\@want_keys,"got_keys and got_each_keys agree ($title)");

                    is_deeply(\@got_values,\@want_values,"got_values and got_each_values agree ($title)");
                    is_deeply(\@got_fetch_values,\@want_values,"values in both are same ($title)");
                    is_deeply(\@got_each_values,\@want_values,"values in both are same ($title)");

                    {
                        my @bad;
                        foreach my $idx (0..$#got_keys) {
                            if (utf8::is_utf8($got_keys[$idx]) != utf8::is_utf8($want_keys[$idx])) {
                                push @bad, [ $got_keys[$idx], $want_keys[$idx] ];
                            }
                        }
                        is(0+@bad,0,"no keys with differing utf8 flags ($title)")
                            or diag Dumper($bad[0]);
                    }

                } else {
                    ok(0,"test cannot pass if make_file dies") for 1..17;
                }
            }
        }
    }
}



( run in 1.540 second using v1.01-cache-2.11-cpan-49f99fa48dc )