view release on metacpan or search on metacpan
MinPerfHashTwoLevel.xs view on Meta::CPAN
#include <sys/stat.h>
#include <assert.h>
#include "mph2l.h"
#include "mph_hv_macro.h"
#include "mph_siphash.h"
#define MAX_VARIANT 5
#define MIN_VARIANT 5
MPH_STATIC_INLINE void
sv_set_from_bucket(pTHX_ SV *sv, U8 *strs, const U32 ofs, const U32 len, const U32 idx, const U8 *flags, const U32 bits, const U8 utf8_default, const U8 utf8_default_shift) {
U8 *ptr;
U8 is_utf8;
if (ofs) {
ptr= (strs) + (ofs);
if (utf8_default) {
is_utf8= utf8_default >> utf8_default_shift;
} else {
GETBITS(is_utf8,flags,idx,bits);
}
} else {
ptr= 0;
is_utf8= 0;
}
/* note that sv_setpvn() will cause the sv to
* become undef if ptr is 0 */
sv_setpvn_mg((sv),ptr,len);
if (is_utf8 > 1) {
sv_utf8_upgrade(sv);
}
else
if (is_utf8) {
SvUTF8_on(sv);
}
else
if (ptr) {
SvUTF8_off(sv);
}
}
MPH_STATIC_INLINE int
lookup_bucket(pTHX_ struct mph_header *mph, U32 index, SV *key_sv, SV *val_sv)
MinPerfHashTwoLevel.xs view on Meta::CPAN
U8 *key_pv;
U64 h0;
U32 h1;
U32 h2;
U32 index;
U8 *got_key_pv;
STRLEN got_key_len;
if (SvUTF8(key_sv)) {
SV *tmp= sv_2mortal(newSVsv(key_sv));
sv_utf8_downgrade(tmp,1);
key_sv= tmp;
}
key_pv= SvPV(key_sv,key_len);
h0= mph_hash_with_state(state,key_pv,key_len);
h1= h0 >> 32;
index= h1 % mph->num_buckets;
bucket= buckets + index;
if (!bucket->xor_val)
return 0;
MinPerfHashTwoLevel.xs view on Meta::CPAN
}
return head->variant;
}
void
mph_munmap(struct mph_obj *obj) {
munmap(obj->header,obj->bytes);
}
STRLEN
normalize_with_flags(pTHX_ SV *sv, SV *normalized_sv, SV *is_utf8_sv, int downgrade) {
STRLEN len;
if (SvROK(sv)) {
croak("Error: Not expecting a reference value in source hash");
}
sv_setsv(normalized_sv,sv);
if (SvOK(sv)) {
STRLEN pv_len;
char *pv= SvPV(sv,pv_len);
if (pv_len > 0xFFFF)
croak("Error: String in source hash is too long to store, max length is %u got length %lu", 0xFFFF, pv_len);
if (SvUTF8(sv)) {
if (downgrade)
sv_utf8_downgrade(normalized_sv,1);
if (SvUTF8(normalized_sv)) {
SvUTF8_off(normalized_sv);
sv_setiv(is_utf8_sv,1);
} else {
sv_setiv(is_utf8_sv,2);
}
}
return pv_len;
} else {
sv_setiv(is_utf8_sv, 0);
return 0;
}
}
U32
_roundup(const U32 n, const U32 s) {
const U32 r= n % s;
if (r) {
return n + s - r;
} else {
MinPerfHashTwoLevel.xs view on Meta::CPAN
normalize_source_hash(pTHX_ HV *source_hv, AV *keys_av, U32 compute_flags, SV *buf_length_sv, char *state_pv) {
dMY_CXT;
HE *he;
U32 buf_length= 0;
U32 ctr;
hv_iterinit(source_hv);
while (he= hv_iternext(source_hv)) {
SV *val_sv= HeVAL(he);
SV *val_normalized_sv;
SV *val_is_utf8_sv;
SV *key_sv;
SV *key_normalized_sv;
SV *key_is_utf8_sv;
HV *hv;
U8 *key_pv;
STRLEN key_len;
U64 h0;
if (!val_sv) croak("panic: no sv for value?");
if (!SvOK(val_sv) && (compute_flags & MPH_F_FILTER_UNDEF)) continue;
hv= newHV();
val_normalized_sv= newSV(0);
val_is_utf8_sv= newSVuv(0);
key_sv= newSVhek(HeKEY_hek(he));
key_normalized_sv= newSV(0);
key_is_utf8_sv= newSVuv(0);
hv_ksplit(hv,15);
hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY, key_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_NORMALIZED, key_normalized_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8, key_is_utf8_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL, SvREFCNT_inc_simple_NN(val_sv));
hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED, val_normalized_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8, val_is_utf8_sv);
/* install everything into the keys_av just in case normalize_with_flags() dies */
av_push(keys_av,newRV_noinc((SV*)hv));
buf_length += normalize_with_flags(aTHX_ key_sv, key_normalized_sv, key_is_utf8_sv, 1);
buf_length += normalize_with_flags(aTHX_ val_sv, val_normalized_sv, val_is_utf8_sv, 0);
key_pv= (U8 *)SvPV(key_normalized_sv,key_len);
h0= mph_hash_with_state(state_pv,key_pv,key_len);
hv_store_ent_with_keysv(hv,MPH_KEYSV_H0, newSVuv(h0));
}
if (buf_length_sv)
sv_setuv(buf_length_sv, buf_length);
/* we now know how many keys there are, and what the max_xor_val should be */
MinPerfHashTwoLevel.xs view on Meta::CPAN
char *state;
struct mph_bucket *table;
char *key_flags;
char *val_flags;
char *str_buf_start;
char *str_buf_end;
char *str_buf_pos;
U32 i;
STRLEN pv_len;
char *pv;
U32 key_is_utf8_count[3]={0,0,0};
U32 val_is_utf8_count[2]={0,0};
U32 used_flags;
U32 the_flag;
IV key_is_utf8_generic=-1;
IV val_is_utf8_generic=-1;
for (i= 0; i < bucket_count; i++) {
SV **got= av_fetch(buckets_av,i,0);
HV *hv= (HV *)SvRV(*got);
HE *key_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
HE *val_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
key_is_utf8_count[SvUV(HeVAL(key_is_utf8_he))]++;
val_is_utf8_count[SvUV(HeVAL(val_is_utf8_he))]++;
}
used_flags= 0;
if (key_is_utf8_count[0]) { the_flag= 0; used_flags++; }
if (key_is_utf8_count[1]) { the_flag= 1; used_flags++; }
if (key_is_utf8_count[2]) { the_flag= 2; used_flags++; }
if (used_flags == 1) {
key_is_utf8_generic= the_flag;
key_flags_rlen= 0;
}
used_flags= 0;
if (val_is_utf8_count[0]) { the_flag= 0; used_flags++; }
if (val_is_utf8_count[1]) { the_flag= 1; used_flags++; }
if (used_flags == 1) {
val_is_utf8_generic= the_flag;
val_flags_rlen= 0;
}
total_size=
+ header_rlen
+ state_rlen
+ table_rlen
+ key_flags_rlen
+ val_flags_rlen
+ str_rlen
MinPerfHashTwoLevel.xs view on Meta::CPAN
head->magic_num= 1278363728;
head->variant= variant;
head->num_buckets= bucket_count;
head->state_ofs= header_rlen;
head->table_ofs= head->state_ofs + state_rlen;
head->key_flags_ofs= head->table_ofs + table_rlen;
head->val_flags_ofs= head->key_flags_ofs + key_flags_rlen;
head->str_buf_ofs= head->val_flags_ofs + val_flags_rlen;
if (val_is_utf8_generic >= 0)
head->general_flags |= (MPH_VALS_ARE_SAME_UTF8NESS_FLAG_BIT | (val_is_utf8_generic << MPH_VALS_ARE_SAME_UTF8NESS_SHIFT));
if (key_is_utf8_generic >= 0)
head->general_flags |= (MPH_KEYS_ARE_SAME_UTF8NESS_FLAG_BIT | (key_is_utf8_generic << MPH_KEYS_ARE_SAME_UTF8NESS_SHIFT));
state= start + head->state_ofs;
table= (struct mph_bucket *)(start + head->table_ofs);
key_flags= start + head->key_flags_ofs;
val_flags= start + head->val_flags_ofs;
str_buf_start= start + head->str_buf_ofs;
str_buf_end= start + total_size;
str_buf_pos= str_buf_start + 2;
Copy(state_pv,state,state_len,char);
MinPerfHashTwoLevel.xs view on Meta::CPAN
HE *val_normalized_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED,0);
HE *xor_val_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_XOR_VAL,0);
if (xor_val_he) {
table[i].xor_val= SvUV(HeVAL(xor_val_he));
} else {
table[i].xor_val= 0;
}
SETOFS(i,key_normalized_he,table,key_ofs,key_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
SETOFS(i,val_normalized_he,table,val_ofs,val_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
if ( key_is_utf8_generic < 0) {
HE *key_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
if (key_is_utf8_he) {
UV u= SvUV(HeVAL(key_is_utf8_he));
SETBITS(u,key_flags,i,2);
} else {
croak("panic: out of memory? no key_is_utf8_he for %u",i);
}
}
if ( val_is_utf8_generic < 0 ) {
HE *val_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
if (val_is_utf8_he) {
UV u= SvUV(HeVAL(val_is_utf8_he));
SETBITS(u,val_flags,i,1);
} else {
croak("panic: out of memory? no val_is_utf8_he for %u",i);
}
}
}
*str_buf_pos = 0; str_buf_pos++;
*str_buf_pos = 128; str_buf_pos++;
{
U32 r= (str_buf_pos - start) % alignment;
if (r) {
str_buf_pos += (alignment - r);
}
lib/Algorithm/MinPerfHashTwoLevel.pm view on Meta::CPAN
x = ((x >> 16) ^ x) * 0x45d9f3b;
x = ((x >> 16) ^ x) * 0x45d9f3b;
x = ((x >> 16) ^ x);
which is just a simple 32 bit integer hash function I found at
https://stackoverflow.com/a/12996028, but any decent reversible
integer hash function would do.
*NOTE* in Perl a given string may have differing binary representations
if it is encoded as utf8 or not. This module uses the same conventions
as Perl itself, which is that keys are stored in their minimal form when
possible, and are only stored in their unicode (utf8) form when they
cannot be downgraded to latin-1. This ensures that the unicode and latin-1
representations of a given string are treated as the same key. This module
deals with this by "normalizing" the keys and values into latin-1, but
tracking the representation as a flag. See key_normalized and key_is_utf8
(and their 'val' equivalents) documented in the construct method.
=head2 METHODS
=over 4
=item new
Construct a new Algorithm::MinPerfHashTwoLevel object. Optional arguments
which may be provided are 'source_hash' which is a hash reference to use
lib/Algorithm/MinPerfHashTwoLevel.pm view on Meta::CPAN
on the 'source_hash' passed into the constructor, or requires one to passed
in as an argument.
Returns an array of hashes containing information about each bucket:
{
"h1_keys" => 2,
"h0" => "17713559403787135240",
"idx" => 0,
"key" => "\x{103}",
"key_is_utf8" => 1,
"key_normalized" => "\304\203",
"val" => "\x{103}",
"val_is_utf8" => 1,
"val_normalized" => "\304\203",
"xor_val" => 2
},
The meaning of these keys is as follows:
=over 4
=item h1_keys
lib/Algorithm/MinPerfHashTwoLevel.pm view on Meta::CPAN
The hash value computed for this key.
=item idx
The index of this bucket.
=item key
The key for this bucket as a perl string. (See key_normalized.)
=item key_is_utf8
Whether this key is encoded as utf8. Will be one of
0 for "not utf8", 1 for "is utf8", and 2 for "was utf8"
meaning the key is stored as latin-1, but will be upgraded
when fetched.
=item key_normalized
The raw bytes of the normalized key. (See key_is_utf8.)
=item val
The value for this bucket as a perl string. (See val_normalized.)
=item val_is_utf8
Whether this key is encoded as utf8. Will be either
0 for "not utf8" or 1 for "is utf8".
=item val_normalized
The raw bytes of the normalized key. (See val_is_utf8).
=item xor_val
The mask to be xor'ed with the second hash (h2) to determine
the actual lookup bucket. If the xor_val for a given bucket
is 0 then the key is not in the hash.
=back
=back
lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm view on Meta::CPAN
=head2 FILE FORMAT
Currently there is only one support file format variant, 5.
The file structure consists of a header, followed by a byte vector of seed/state
data for the hash function, followed by a bucket table with records of a fixed size,
optionally followed by a bitvector of the flags for the keys with two bits per key,
optionally followed by a bitvector of flags for values with one bit per value,
followed by a string table containing the comment for the file and the strings it
contains, and lastly a checksum; the last 8 bytes of the file contain a hash of the
rest of the file. The key flags may be 0 for "latin-1/not-utf8", 1 for "is-utf8",
and 2 for "was-utf8" which is used for keys which can be represented as latin-1,
but should be restored as unicode/utf8. The val flags are similar but do not (need to)
support "was-utf8".
Structure:
Header
Hash-state
Bucket-table
Key flags (optional)
Val flags (optional)
Strings
Checksum
} STMT_END
#define MPH_INIT_ALL_KEYSV() STMT_START {\
MY_CXT_INIT; \
MPH_INIT_KEYSV(MPH_KEYSV_IDX,"idx"); \
MPH_INIT_KEYSV(MPH_KEYSV_H1_KEYS,"h1_keys"); \
MPH_INIT_KEYSV(MPH_KEYSV_XOR_VAL,"xor_val"); \
MPH_INIT_KEYSV(MPH_KEYSV_H0,"h0"); \
MPH_INIT_KEYSV(MPH_KEYSV_KEY,"key"); \
MPH_INIT_KEYSV(MPH_KEYSV_KEY_NORMALIZED,"key_normalized"); \
MPH_INIT_KEYSV(MPH_KEYSV_KEY_IS_UTF8,"key_is_utf8"); \
MPH_INIT_KEYSV(MPH_KEYSV_VAL,"val"); \
MPH_INIT_KEYSV(MPH_KEYSV_VAL_NORMALIZED,"val_normalized"); \
MPH_INIT_KEYSV(MPH_KEYSV_VAL_IS_UTF8,"val_is_utf8"); \
\
MPH_INIT_KEYSV(MPH_KEYSV_VARIANT,"variant"); \
MPH_INIT_KEYSV(MPH_KEYSV_COMPUTE_FLAGS,"compute_flags"); \
MPH_INIT_KEYSV(MPH_KEYSV_STATE,"state"); \
MPH_INIT_KEYSV(MPH_KEYSV_SOURCE_HASH,"source_hash"); \
MPH_INIT_KEYSV(MPH_KEYSV_BUF_LENGTH,"buf_length"); \
MPH_INIT_KEYSV(MPH_KEYSV_BUCKETS,"buckets"); \
MPH_INIT_KEYSV(MPH_KEYSV_MOUNT,"mount"); \
} STMT_END
pv_escape() NEED_pv_escape NEED_pv_escape_GLOBAL
pv_pretty() NEED_pv_pretty NEED_pv_pretty_GLOBAL
sv_2pv_flags() NEED_sv_2pv_flags NEED_sv_2pv_flags_GLOBAL
sv_2pvbyte() NEED_sv_2pvbyte NEED_sv_2pvbyte_GLOBAL
sv_catpvf_mg() NEED_sv_catpvf_mg NEED_sv_catpvf_mg_GLOBAL
sv_catpvf_mg_nocontext() NEED_sv_catpvf_mg_nocontext NEED_sv_catpvf_mg_nocontext_GLOBAL
sv_pvn_force_flags() NEED_sv_pvn_force_flags NEED_sv_pvn_force_flags_GLOBAL
sv_setpvf_mg() NEED_sv_setpvf_mg NEED_sv_setpvf_mg_GLOBAL
sv_setpvf_mg_nocontext() NEED_sv_setpvf_mg_nocontext NEED_sv_setpvf_mg_nocontext_GLOBAL
sv_unmagicext() NEED_sv_unmagicext NEED_sv_unmagicext_GLOBAL
utf8_to_uvchr_buf() NEED_utf8_to_uvchr_buf NEED_utf8_to_uvchr_buf_GLOBAL
vload_module() NEED_vload_module NEED_vload_module_GLOBAL
vmess() NEED_vmess NEED_vmess_GLOBAL
vnewSVpvf() NEED_vnewSVpvf NEED_vnewSVpvf_GLOBAL
warner() NEED_warner NEED_warner_GLOBAL
To avoid namespace conflicts, you can change the namespace of the
explicitly exported functions / variables using the C<DPPP_NAMESPACE>
macro. Just C<#define> the macro before including C<ppport.h>:
#define DPPP_NAMESPACE MyOwnNamespace_
PERL_MAGIC_shared_scalar|5.007003||p
PERL_MAGIC_shared|5.007003||p
PERL_MAGIC_sigelem|5.007002||p
PERL_MAGIC_sig|5.007002||p
PERL_MAGIC_substr|5.007002||p
PERL_MAGIC_sv|5.007002||p
PERL_MAGIC_taint|5.007002||p
PERL_MAGIC_tiedelem|5.007002||p
PERL_MAGIC_tiedscalar|5.007002||p
PERL_MAGIC_tied|5.007002||p
PERL_MAGIC_utf8|5.008001||p
PERL_MAGIC_uvar_elem|5.007003||p
PERL_MAGIC_uvar|5.007002||p
PERL_MAGIC_vec|5.007002||p
PERL_MAGIC_vstring|5.008001||p
PERL_PV_ESCAPE_ALL|5.009004||p
PERL_PV_ESCAPE_FIRSTCHAR|5.009004||p
PERL_PV_ESCAPE_NOBACKSLASH|5.009004||p
PERL_PV_ESCAPE_NOCLEAR|5.009004||p
PERL_PV_ESCAPE_QUOTE|5.009004||p
PERL_PV_ESCAPE_RE|5.009005||p
SvPV_nomg_const|5.009003||p
SvPV_nomg_nolen|5.013007||p
SvPV_nomg|5.007002||p
SvPV_renew|5.009003||p
SvPV_set|||
SvPVbyte_force||5.009002|
SvPVbyte_nolen||5.006000|
SvPVbytex_force||5.006000|
SvPVbytex||5.006000|
SvPVbyte|5.006000||p
SvPVutf8_force||5.006000|
SvPVutf8_nolen||5.006000|
SvPVutf8x_force||5.006000|
SvPVutf8x||5.006000|
SvPVutf8||5.006000|
SvPVx|||
SvPV|||
SvREADONLY_off|||
SvREADONLY_on|||
SvREADONLY|||
SvREFCNT_dec_NN||5.017007|
SvREFCNT_dec|||
SvREFCNT_inc_NN|5.009004||p
SvREFCNT_inc_simple_NN|5.009004||p
SvREFCNT_inc_simple_void_NN|5.009004||p
Zero|||
__ASSERT_|||p
_aMY_CXT|5.007003||p
_inverse_folds|||
_is_grapheme|||
_is_in_locale_category|||
_new_invlist_C_array|||
_pMY_CXT|5.007003||p
_to_fold_latin1|||n
_to_upper_title_latin1|||
_to_utf8_case|||
_variant_byte_number|||n
_warn_problematic_locale|||n
aMY_CXT_|5.007003||p
aMY_CXT|5.007003||p
aTHXR_|5.024000||p
aTHXR|5.024000||p
aTHX_|5.006000||p
aTHX|5.006000||p
abort_execution|||
add_above_Latin1_folds|||
bad_type_pv|||
bind_match|||
block_end||5.004000|
block_gimme||5.004000|
block_start||5.004000|
blockhook_register||5.013003|
boolSV|5.004000||p
boot_core_PerlIO|||
boot_core_UNIVERSAL|||
boot_core_mro|||
bytes_cmp_utf8||5.013007|
cBOOL|5.013000||p
call_argv|5.006000||p
call_atexit||5.006000|
call_list||5.004000|
call_method|5.006000||p
call_pv|5.006000||p
call_sv|5.006000||p
caller_cx|5.013005|5.006000|p
calloc||5.007002|n
cando|||
do_seek|||
do_semop|||
do_shmio|||
do_smartmatch|||
do_spawn_nowait|||
do_spawn|||
do_sprintf|||
do_sv_dump||5.006000|
do_sysseek|||
do_tell|||
do_trans_complex_utf8|||
do_trans_complex|||
do_trans_count_utf8|||
do_trans_count|||
do_trans_simple_utf8|||
do_trans_simple|||
do_trans|||
do_vecget|||
do_vecset|||
do_vop|||
docatch|||
does_utf8_overflow|||n
doeval_compile|||
dofile|||
dofindlabel|||
doform|||
doing_taint||5.008001|n
dooneliner|||
doopen_pm|||
doparseform|||
dopoptoeval|||
dopoptogivenfor|||
find_runcv||5.008001|
find_rundefsv||5.013002|
find_script|||
find_span_end_mask|||n
find_span_end|||n
first_symbol|||n
fixup_errno_string|||
foldEQ_latin1_s2_folded|||n
foldEQ_latin1||5.013008|n
foldEQ_locale||5.013002|n
foldEQ_utf8||5.013002|
foldEQ||5.013002|n
fold_constants|||
forbid_setid|||
force_ident_maybe_lex|||
force_ident|||
force_list|||
force_next|||
force_strict_version|||
force_version|||
force_word|||
hv_rand_set||5.018000|
hv_riter_p||5.009003|
hv_riter_set||5.009003|
hv_scalar||5.009001|
hv_store_ent||5.003070|
hv_stores|5.009004||p
hv_store|||
hv_undef_flags|||
hv_undef|||
ibcmp_locale||5.004000|
ibcmp_utf8||5.007003|
ibcmp|||
incline|||
incpush_if_exists|||
incpush_use_sep|||
incpush|||
ingroup|||
init_argv_symbols|||
init_constants|||
init_dbargs|||
init_debugger|||
isASCII_A|||p
isASCII|5.006000||p
isBLANK_A|||p
isBLANK|5.006001||p
isC9_STRICT_UTF8_CHAR|||n
isCNTRL_A|||p
isCNTRL|5.006000||p
isDIGIT_A|||p
isDIGIT|||p
isFF_OVERLONG|||n
isFOO_utf8_lc|||
isGCB|||
isGRAPH_A|||p
isGRAPH|5.006000||p
isIDCONT_A|||p
isIDCONT|5.017008|5.017008|p
isIDFIRST_A|||p
isIDFIRST|||p
isLB|||
isLOWER_A|||p
isLOWER|||p
isUPPER|||p
isUTF8_CHAR_flags|||
isUTF8_CHAR||5.021001|n
isWB|||
isWORDCHAR_A|||p
isWORDCHAR|5.013006|5.013006|p
isXDIGIT_A|||p
isXDIGIT|5.006000||p
is_an_int|||
is_ascii_string||5.011000|n
is_c9strict_utf8_string_loclen|||n
is_c9strict_utf8_string_loc|||n
is_c9strict_utf8_string|||n
is_handle_constructor|||n
is_invariant_string||5.021007|n
is_lvalue_sub||5.007001|
is_safe_syscall||5.019004|
is_ssc_worth_it|||n
is_strict_utf8_string_loclen|||n
is_strict_utf8_string_loc|||n
is_strict_utf8_string|||n
is_utf8_char_buf||5.015008|n
is_utf8_common_with_len|||
is_utf8_common|||
is_utf8_cp_above_31_bits|||n
is_utf8_fixed_width_buf_flags|||n
is_utf8_fixed_width_buf_loc_flags|||n
is_utf8_fixed_width_buf_loclen_flags|||n
is_utf8_invariant_string_loc|||n
is_utf8_invariant_string|||n
is_utf8_non_invariant_string|||n
is_utf8_overlong_given_start_byte_ok|||n
is_utf8_string_flags|||n
is_utf8_string_loc_flags|||n
is_utf8_string_loclen_flags|||n
is_utf8_string_loclen||5.009003|n
is_utf8_string_loc||5.008001|n
is_utf8_string||5.006001|n
is_utf8_valid_partial_char_flags|||n
is_utf8_valid_partial_char|||n
isa_lookup|||
isinfnansv|||
isinfnan||5.021004|n
items|||n
ix|||n
jmaybe|||
join_exact|||
keyword_plugin_standard|||
keyword|||
leave_scope|||
magic_setlvref|||
magic_setmglob|||
magic_setnkeys|||
magic_setnonelem|||
magic_setpack|||
magic_setpos|||
magic_setregexp|||
magic_setsig|||
magic_setsubstr|||
magic_settaint|||
magic_setutf8|||
magic_setuvar|||
magic_setvec|||
magic_set|||
magic_sizepack|||
magic_wipepack|||
make_matcher|||
make_trie|||
malloc_good_size|||n
malloced_size|||n
malloc||5.007002|n
mro_register||5.010001|
mro_set_mro||5.010001|
mro_set_private_data||5.010001|
mul128|||
multiconcat_stringify|||
multideref_stringify|||
my_atof2||5.007002|
my_atof3|||
my_atof||5.006000|
my_attrs|||
my_bytes_to_utf8|||n
my_chsize|||
my_clearenv|||
my_cxt_index|||
my_cxt_init|||
my_dirfd||5.009005|n
my_exit_jump|||
my_exit|||
my_failure_exit||5.004000|
my_fflush_all||5.006000|
my_fork||5.007003|n
my_stat_flags|||
my_stat||5.024000|
my_strerror|||
my_strftime||5.007002|
my_strlcat|5.009004||pn
my_strlcpy|5.009004||pn
my_strnlen|||pn
my_strtod|||n
my_unexec|||
my_vsnprintf||5.009004|n
need_utf8|||n
newANONATTRSUB||5.006000|
newANONHASH|||
newANONLIST|||
newANONSUB|||
newASSIGNOP|||
newATTRSUB_x|||
newATTRSUB||5.006000|
newAVREF|||
newAV|||
newBINOP|||
newSVavdefelem|||
newSVhek||5.009003|
newSViv|||
newSVnv|||
newSVpadname||5.017004|
newSVpv_share||5.013006|
newSVpvf_nocontext|||vn
newSVpvf||5.004000|v
newSVpvn_flags|5.010001||p
newSVpvn_share|5.007001||p
newSVpvn_utf8|5.010001||p
newSVpvn|5.004050||p
newSVpvs_flags|5.010001||p
newSVpvs_share|5.009003||p
newSVpvs|5.009003||p
newSVpv|||
newSVrv|||
newSVsv_flags|||
newSVsv_nomg|||
newSVsv|||
newSVuv|5.006000||p
nextargv|||
nextchar|||
ninstr|||n
no_bareword_allowed|||
no_fh_allowed|||
no_op|||
noperl_die|||vn
not_a_number|||
not_incrementable|||
nothreadhook||5.008000|
notify_parser_that_changed_to_utf8|||
nuke_stacks|||
num_overflow|||n
oopsAV|||
oopsHV|||
op_append_elem||5.013006|
op_append_list||5.013006|
op_class|||
op_clear|||
op_contextualize||5.013006|
op_convert_list||5.021006|
sv_2iuv_common|||
sv_2iuv_non_preserve|||
sv_2iv_flags||5.009001|
sv_2iv|||
sv_2mortal|||
sv_2nv_flags||5.013001|
sv_2pv_flags|5.007002||p
sv_2pv_nolen|5.006000||p
sv_2pvbyte_nolen|5.006000||p
sv_2pvbyte|5.006000||p
sv_2pvutf8_nolen||5.006000|
sv_2pvutf8||5.006000|
sv_2pv|||
sv_2uv_flags||5.009001|
sv_2uv|5.004000||p
sv_add_arena|||
sv_add_backref|||
sv_backoff|||n
sv_bless|||
sv_buf_to_ro|||
sv_buf_to_rw|||
sv_cat_decode||5.008001|
sv_gets||5.003070|
sv_grow|||
sv_i_ncmp|||
sv_inc_nomg||5.013002|
sv_inc|||
sv_insert_flags||5.010001|
sv_insert|||
sv_isa|||
sv_isobject|||
sv_iv||5.005000|
sv_len_utf8_nomg|||
sv_len_utf8||5.006000|
sv_len|||
sv_magic_portable|5.024000|5.004000|p
sv_magicext_mglob|||
sv_magicext||5.007003|
sv_magic|||
sv_mortalcopy_flags|||
sv_mortalcopy|||
sv_ncmp|||
sv_newmortal|||
sv_newref|||
sv_pos_u2b_forwards|||n
sv_pos_u2b_midway|||n
sv_pos_u2b||5.006000|
sv_pvbyten_force||5.006000|
sv_pvbyten||5.006000|
sv_pvbyte||5.006000|
sv_pvn_force_flags|5.007002||p
sv_pvn_force|||
sv_pvn_nomg|5.007003|5.005000|p
sv_pvn||5.005000|
sv_pvutf8n_force||5.006000|
sv_pvutf8n||5.006000|
sv_pvutf8||5.006000|
sv_pv||5.006000|
sv_recode_to_utf8||5.007003|
sv_reftype|||
sv_ref||5.015004|
sv_replace|||
sv_report_used|||
sv_resetpvn|||
sv_reset|||
sv_rvunweaken|||
sv_rvweaken||5.006000|
sv_set_undef|||
sv_sethek|||
sv_uni_display||5.007003|
sv_unmagicext|5.013008||p
sv_unmagic|||
sv_unref_flags||5.007001|
sv_unref|||
sv_untaint||5.004000|
sv_upgrade|||
sv_usepvn_flags||5.009004|
sv_usepvn_mg|5.004050||p
sv_usepvn|||
sv_utf8_decode|||
sv_utf8_downgrade|||
sv_utf8_encode||5.006000|
sv_utf8_upgrade_flags_grow||5.011000|
sv_utf8_upgrade_flags||5.007002|
sv_utf8_upgrade_nomg||5.007002|
sv_utf8_upgrade||5.007001|
sv_uv|5.005000||p
sv_vcatpvf_mg|5.006000|5.004000|p
sv_vcatpvfn_flags||5.017002|
sv_vcatpvfn||5.004000|
sv_vcatpvf|5.006000|5.004000|p
sv_vsetpvf_mg|5.006000|5.004000|p
sv_vsetpvfn||5.004000|
sv_vsetpvf|5.006000|5.004000|p
svtype|||
swallow_bom|||
sys_init3||5.010000|n
sys_init||5.010000|n
sys_intern_clear|||
sys_intern_dup|||
sys_intern_init|||
sys_term||5.010000|n
taint_env|||
taint_proper|||
tied_method|||v
tmps_grow_p|||
toFOLD_utf8_safe|||
toFOLD_utf8||5.019001|
toFOLD_uvchr||5.023009|
toFOLD||5.019001|
toLOWER_L1||5.019001|
toLOWER_LC||5.004000|
toLOWER_utf8_safe|||
toLOWER_utf8||5.015007|
toLOWER_uvchr||5.023009|
toLOWER|||
toTITLE_utf8_safe|||
toTITLE_utf8||5.015007|
toTITLE_uvchr||5.023009|
toTITLE||5.019001|
toUPPER_utf8_safe|||
toUPPER_utf8||5.015007|
toUPPER_uvchr||5.023009|
toUPPER|||
to_byte_substr|||
to_lower_latin1|||n
to_utf8_substr|||
tokenize_use|||
tokeq|||
tokereport|||
too_few_arguments_pv|||
too_many_arguments_pv|||
translate_substr_offsets|||n
traverse_op_tree|||
try_amagic_bin|||
try_amagic_un|||
turkic_fc|||
unpackstring||5.008001|
unreferenced_to_tmp_stack|||
unshare_hek_or_pvn|||
unshare_hek|||
unsharepvn||5.003070|
unwind_handler_stack|||
update_debugger_info|||
upg_version||5.009005|
usage|||
utf16_textfilter|||
utf16_to_utf8_reversed||5.006001|
utf16_to_utf8||5.006001|
utf8_distance||5.006000|
utf8_hop_back|||n
utf8_hop_forward|||n
utf8_hop_safe|||n
utf8_hop||5.006000|n
utf8_length||5.007001|
utf8_mg_len_cache_update|||
utf8_mg_pos_cache_update|||
utf8_to_uvchr_buf|5.015009|5.015009|p
utf8_to_uvchr|||p
utf8n_to_uvchr_error|||n
utf8n_to_uvchr||5.007001|n
utf8n_to_uvuni||5.007001|
utilize|||
uvchr_to_utf8_flags||5.007003|
uvchr_to_utf8||5.007001|
uvoffuni_to_utf8_flags||5.019004|
uvuni_to_utf8_flags||5.007003|
uvuni_to_utf8||5.007001|
valid_utf8_to_uvchr|||n
validate_suid|||
variant_under_utf8_count|||n
varname|||
vcmp||5.009000|
vcroak||5.006000|
vdeb||5.007003|
vform||5.006000|
visit|||
vivify_defelem|||
vivify_ref|||
vload_module|5.006000||p
vmess|5.006000|5.006000|p
return p - str;
}
#endif
#endif
#if (PERL_BCDVERSION < 0x5031002)
/* Versions prior to this accepted things that are now considered
* malformations, and didn't return -1 on error with warnings enabled
* */
# undef utf8_to_uvchr_buf
#endif
/* This implementation brings modern, generally more restricted standards to
* utf8_to_uvchr_buf. Some of these are security related, and clearly must
* be done. But its arguable that the others need not, and hence should not.
* The reason they're here is that a module that intends to play with the
* latest perls shoud be able to work the same in all releases. An example is
* that perl no longer accepts any UV for a code point, but limits them to
* IV_MAX or below. This is for future internal use of the larger code points.
* If it turns out that some of these changes are breaking code that isn't
* intended to work with modern perls, the tighter restrictions could be
* relaxed. khw thinks this is unlikely, but has been wrong in the past. */
#ifndef utf8_to_uvchr_buf
/* Choose which underlying implementation to use. At least one must be
* present or the perl is too early to handle this function */
# if defined(utf8n_to_uvchr) || defined(utf8_to_uv)
# if defined(utf8n_to_uvchr) /* This is the preferred implementation */
# define _ppport_utf8_to_uvchr_buf_callee utf8n_to_uvchr
# else
# define _ppport_utf8_to_uvchr_buf_callee utf8_to_uv
# endif
# endif
#ifdef _ppport_utf8_to_uvchr_buf_callee
# if defined(NEED_utf8_to_uvchr_buf)
static UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send, STRLEN * retlen);
static
#else
extern UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send, STRLEN * retlen);
#endif
#if defined(NEED_utf8_to_uvchr_buf) || defined(NEED_utf8_to_uvchr_buf_GLOBAL)
#ifdef utf8_to_uvchr_buf
# undef utf8_to_uvchr_buf
#endif
#define utf8_to_uvchr_buf(a,b,c) DPPP_(my_utf8_to_uvchr_buf)(aTHX_ a,b,c)
#define Perl_utf8_to_uvchr_buf DPPP_(my_utf8_to_uvchr_buf)
UV
DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
{
UV ret;
STRLEN curlen;
bool overflows = 0;
const U8 *cur_s = s;
const bool do_warnings = ckWARN_d(WARN_UTF8);
if (send > s) {
curlen = send - s;
}
assert(0); /* Modern perls die under this circumstance */
curlen = 0;
if (! do_warnings) { /* Handle empty here if no warnings needed */
if (retlen) *retlen = 0;
return UNICODE_REPLACEMENT;
}
}
/* The modern version allows anything that evaluates to a legal UV, but not
* overlongs nor an empty input */
ret = _ppport_utf8_to_uvchr_buf_callee(
s, curlen, retlen, (UTF8_ALLOW_ANYUV
& ~(UTF8_ALLOW_LONG|UTF8_ALLOW_EMPTY)));
/* But actually, modern versions restrict the UV to being no more than what
* an IV can hold */
if (ret > PERL_INT_MAX) {
overflows = 1;
}
# if (PERL_BCDVERSION < 0x5026000)
ret, *cur_s, *s);
}
if (retlen) {
*retlen = (STRLEN) -1;
}
return 0;
}
}
/* If failed and warnings are off, to emulate the behavior of the real
* utf8_to_uvchr(), try again, allowing anything. (Note a return of 0 is
* ok if the input was '\0') */
if (UNLIKELY(ret == 0 && (curlen == 0 || *s != '\0'))) {
/* If curlen is 0, we already handled the case where warnings are
* disabled, so this 'if' will be true, and we won't look at the
* contents of 's' */
if (do_warnings) {
*retlen = (STRLEN) -1;
}
else {
ret = _ppport_utf8_to_uvchr_buf_callee(
s, curlen, retlen, UTF8_ALLOW_ANY);
/* Override with the REPLACEMENT character, as that is what the
* modern version of this function returns */
ret = UNICODE_REPLACEMENT;
# if (PERL_BCDVERSION < 0x5016000)
/* Versions earlier than this don't necessarily return the proper
* length. It should not extend past the end of string, nor past
* what the first byte indicates the length is, nor past the
}
}
return ret;
}
# endif
#endif
#endif
#if defined(UTF8SKIP) && defined(utf8_to_uvchr_buf)
#undef utf8_to_uvchr /* Always redefine this unsafe function so that it refuses
to read past a NUL, making it much less likely to read
off the end of the buffer. A NUL indicates the start
of the next character anyway. If the input isn't
NUL-terminated, the function remains unsafe, as it
always has been. */
#ifndef utf8_to_uvchr
# define utf8_to_uvchr(s, lp) \
((*(s) == '\0') \
? utf8_to_uvchr_buf(s,((s)+1), lp) /* Handle single NUL specially */ \
: utf8_to_uvchr_buf(s, (s) + my_strnlen((char *) (s), UTF8SKIP(s)), (lp)))
#endif
#endif
#ifdef HAS_MEMCMP
#ifndef memNE
# define memNE(s1,s2,l) (memcmp(s1,s2,l))
#endif
#ifndef memEQ
#endif
#ifndef PERL_MAGIC_vstring
# define PERL_MAGIC_vstring 'V'
#endif
#ifndef PERL_MAGIC_vec
# define PERL_MAGIC_vec 'v'
#endif
#ifndef PERL_MAGIC_utf8
# define PERL_MAGIC_utf8 'w'
#endif
#ifndef PERL_MAGIC_substr
# define PERL_MAGIC_substr 'x'
#endif
#ifndef PERL_MAGIC_defelem
# define PERL_MAGIC_defelem 'y'
#endif
#if (PERL_BCDVERSION < 0x5006000)
# define D_PPP_CONSTPV_ARG(x) ((char *) (x))
#else
# define D_PPP_CONSTPV_ARG(x) (x)
#endif
#ifndef newSVpvn
# define newSVpvn(data,len) ((data) \
? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \
: newSV(0))
#endif
#ifndef newSVpvn_utf8
# define newSVpvn_utf8(s, len, u) newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
#endif
#ifndef SVf_UTF8
# define SVf_UTF8 0
#endif
#ifndef newSVpvn_flags
#if defined(NEED_newSVpvn_flags)
static SV * DPPP_(my_newSVpvn_flags)(pTHX_ const char *s, STRLEN len, U32 flags);
static
#ifdef sv_2pvbyte
# undef sv_2pvbyte
#endif
#define sv_2pvbyte(a,b) DPPP_(my_sv_2pvbyte)(aTHX_ a,b)
#define Perl_sv_2pvbyte DPPP_(my_sv_2pvbyte)
char *
DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv, STRLEN *lp)
{
sv_utf8_downgrade(sv,0);
return SvPV(sv,*lp);
}
#endif
/* Hint: sv_2pvbyte
* Use the SvPVbyte() macro instead of sv_2pvbyte().
*/
#undef SvPVbyte
const MGVTBL* const virt = mg->mg_virtual;
if (mg->mg_type == type && virt == vtbl) {
*mgp = mg->mg_moremagic;
if (virt && virt->svt_free)
virt->svt_free(aTHX_ sv, mg);
if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) {
if (mg->mg_len > 0)
Safefree(mg->mg_ptr);
else if (mg->mg_len == HEf_SVKEY) /* Questionable on older perls... */
SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr));
else if (mg->mg_type == PERL_MAGIC_utf8)
Safefree(mg->mg_ptr);
}
if (mg->mg_flags & MGf_REFCOUNTED)
SvREFCNT_dec(mg->mg_obj);
Safefree(mg);
}
else
mgp = &mg->mg_moremagic;
}
if (SvMAGIC(sv)) {
DPPP_(my_pv_escape)(pTHX_ SV *dsv, char const * const str,
const STRLEN count, const STRLEN max,
STRLEN * const escaped, const U32 flags)
{
const char esc = flags & PERL_PV_ESCAPE_RE ? '%' : '\\';
const char dq = flags & PERL_PV_ESCAPE_QUOTE ? '"' : esc;
char octbuf[32] = "%123456789ABCDF";
STRLEN wrote = 0;
STRLEN chsize = 0;
STRLEN readsize = 1;
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
bool isuni = flags & PERL_PV_ESCAPE_UNI ? 1 : 0;
#endif
const char *pv = str;
const char * const end = pv + count;
octbuf[0] = esc;
if (!(flags & PERL_PV_ESCAPE_NOCLEAR))
sv_setpvs(dsv, "");
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8*)pv, count))
isuni = 1;
#endif
for (; pv < end && (!max || wrote < max) ; pv += readsize) {
const UV u =
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
isuni ? utf8_to_uvchr_buf((U8*)pv, end, &readsize) :
#endif
(U8)*pv;
const U8 c = (U8)u & 0xFF;
if (u > 255 || (flags & PERL_PV_ESCAPE_ALL)) {
if (flags & PERL_PV_ESCAPE_FIRSTCHAR)
chsize = my_snprintf(octbuf, sizeof octbuf,
"%" UVxf, u);
else
chsize = my_snprintf(octbuf, sizeof octbuf,
t/Algo_v5.t view on Meta::CPAN
}
my %hash= ("A" .. "Z");
my $buckets= $o->compute(\%hash);
is_deeply($buckets,
[
{
"h0" => "2811388775115704789",
"idx" => 0,
"key" => "M",
"key_is_utf8" => 0,
"key_normalized" => "M",
"val" => "N",
"val_is_utf8" => 0,
"val_normalized" => "N"
},
{
"h0" => "8860848721851830215",
"h1_keys" => 2,
"idx" => 1,
"key" => "K",
"key_is_utf8" => 0,
"key_normalized" => "K",
"val" => "L",
"val_is_utf8" => 0,
"val_normalized" => "L",
"xor_val" => 1
},
{
"h0" => "2333874141850904510",
"h1_keys" => 1,
"idx" => 2,
"key" => "G",
"key_is_utf8" => 0,
"key_normalized" => "G",
"val" => "H",
"val_is_utf8" => 0,
"val_normalized" => "H",
"xor_val" => "4294967295"
},
{
"h0" => "16876117158654460122",
"idx" => 3,
"key" => "Y",
"key_is_utf8" => 0,
"key_normalized" => "Y",
"val" => "Z",
"val_is_utf8" => 0,
"val_normalized" => "Z"
},
{
"h0" => "8715353882719101949",
"h1_keys" => 1,
"idx" => 4,
"key" => "S",
"key_is_utf8" => 0,
"key_normalized" => "S",
"val" => "T",
"val_is_utf8" => 0,
"val_normalized" => "T",
"xor_val" => "4294967294"
},
{
"h0" => "7118026915973626049",
"h1_keys" => 1,
"idx" => 5,
"key" => "U",
"key_is_utf8" => 0,
"key_normalized" => "U",
"val" => "V",
"val_is_utf8" => 0,
"val_normalized" => "V",
"xor_val" => "4294967293"
},
{
"h0" => "8257329964001049281",
"h1_keys" => 2,
"idx" => 6,
"key" => "O",
"key_is_utf8" => 0,
"key_normalized" => "O",
"val" => "P",
"val_is_utf8" => 0,
"val_normalized" => "P",
"xor_val" => 1
},
{
"h0" => "5518171323424817881",
"h1_keys" => 1,
"idx" => 7,
"key" => "E",
"key_is_utf8" => 0,
"key_normalized" => "E",
"val" => "F",
"val_is_utf8" => 0,
"val_normalized" => "F",
"xor_val" => "4294967291"
},
{
"h0" => "3591181703942984702",
"h1_keys" => 1,
"idx" => 8,
"key" => "Q",
"key_is_utf8" => 0,
"key_normalized" => "Q",
"val" => "R",
"val_is_utf8" => 0,
"val_normalized" => "R",
"xor_val" => "4294967288"
},
{
"h0" => "8458370515337648683",
"h1_keys" => 1,
"idx" => 9,
"key" => "W",
"key_is_utf8" => 0,
"key_normalized" => "W",
"val" => "X",
"val_is_utf8" => 0,
"val_normalized" => "X",
"xor_val" => "4294967286"
},
{
"h0" => "18153270191496908466",
"idx" => 10,
"key" => "C",
"key_is_utf8" => 0,
"key_normalized" => "C",
"val" => "D",
"val_is_utf8" => 0,
"val_normalized" => "D"
},
{
"h0" => "5043040936135718210",
"h1_keys" => 2,
"idx" => 11,
"key" => "A",
"key_is_utf8" => 0,
"key_normalized" => "A",
"val" => "B",
"val_is_utf8" => 0,
"val_normalized" => "B",
"xor_val" => 2
},
{
"h0" => "15354065489600908969",
"h1_keys" => 1,
"idx" => 12,
"key" => "I",
"key_is_utf8" => 0,
"key_normalized" => "I",
"val" => "J",
"val_is_utf8" => 0,
"val_normalized" => "J",
"xor_val" => "4294967285"
}
],
"simple hash A-Z",
) or diag Dumper($buckets);
%hash=map {
my $key= chr($_);
utf8::upgrade($key) if $_ % 2;
$key => $key
} 250 .. 260;
$buckets= $o->compute(\%hash);
is_deeply($buckets,
[
{
"h0" => "7329038856428266488",
"h1_keys" => 1,
"idx" => 0,
"key" => "\376",
"key_is_utf8" => 0,
"key_normalized" => "\376",
"val" => "\376",
"val_is_utf8" => 0,
"val_normalized" => "\376",
"xor_val" => "4294967293"
},
{
"h0" => "7406429479659263986",
"h1_keys" => 2,
"idx" => 1,
"key" => "\x{101}",
"key_is_utf8" => 1,
"key_normalized" => "\304\201",
"val" => "\x{101}",
"val_is_utf8" => 1,
"val_normalized" => "\304\201",
"xor_val" => 2
},
{
"h0" => "7026079938176527097",
"h1_keys" => 1,
"idx" => 2,
"key" => "\x{103}",
"key_is_utf8" => 1,
"key_normalized" => "\304\203",
"val" => "\x{103}",
"val_is_utf8" => 1,
"val_normalized" => "\304\203",
"xor_val" => "4294967292"
},
{
"h0" => "2560445542346638988",
"idx" => 3,
"key" => "\x{100}",
"key_is_utf8" => 1,
"key_normalized" => "\304\200",
"val" => "\x{100}",
"val_is_utf8" => 1,
"val_normalized" => "\304\200"
},
{
"h0" => "15075586565050556610",
"h1_keys" => 3,
"idx" => 4,
"key" => "\x{102}",
"key_is_utf8" => 1,
"key_normalized" => "\304\202",
"val" => "\x{102}",
"val_is_utf8" => 1,
"val_normalized" => "\304\202",
"xor_val" => 1
},
{
"h0" => "10520228168695442556",
"h1_keys" => 1,
"idx" => 5,
"key" => "\x{ff}",
"key_is_utf8" => 2,
"key_normalized" => "\377",
"val" => "\x{ff}",
"val_is_utf8" => 1,
"val_normalized" => "\303\277",
"xor_val" => "4294967290"
},
{
"h0" => "10697593882658072482",
"h1_keys" => 1,
"idx" => 6,
"key" => "\372",
"key_is_utf8" => 0,
"key_normalized" => "\372",
"val" => "\372",
"val_is_utf8" => 0,
"val_normalized" => "\372",
"xor_val" => "4294967286"
},
{
"h0" => "12905152200806003791",
"idx" => 7,
"key" => "\374",
"key_is_utf8" => 0,
"key_normalized" => "\374",
"val" => "\374",
"val_is_utf8" => 0,
"val_normalized" => "\374"
},
{
"h0" => "11318277185996573588",
"idx" => 8,
"key" => "\x{fd}",
"key_is_utf8" => 2,
"key_normalized" => "\375",
"val" => "\x{fd}",
"val_is_utf8" => 1,
"val_normalized" => "\303\275"
},
{
"h0" => "11837163118808456557",
"idx" => 9,
"key" => "\x{104}",
"key_is_utf8" => 1,
"key_normalized" => "\304\204",
"val" => "\x{104}",
"val_is_utf8" => 1,
"val_normalized" => "\304\204"
},
{
"h0" => "13939891494320893160",
"h1_keys" => 2,
"idx" => 10,
"key" => "\x{fb}",
"key_is_utf8" => 2,
"key_normalized" => "\373",
"val" => "\x{fb}",
"val_is_utf8" => 1,
"val_normalized" => "\303\273",
"xor_val" => 2
}
],
"hash with utf8 works as expected",
) or diag Dumper($buckets);
t/OnDisk.pl view on Meta::CPAN
ok(defined($srand),"srand as expected: $srand");
my $eval_ok= eval {
tie my(%fail), $class => $0;
1;
};
my $error= !$eval_ok && $@;
ok($error,"it failed: $@");
my $tmpdir= File::Temp->newdir();
my $not_utf8= "not utf8: \x{DF}";
my $utf8_can_be_downgraded= "was utf8: \x{DF}";
utf8::upgrade($utf8_can_be_downgraded);
my $must_be_utf8= "is utf8: \x{100}"; # this can ONLY be represented as utf8
my @source_hashes= (
simple => {
foo => "bar",
baz => "bop",
fiz => "shmang",
plop => "shwoosh",
},
large => { map { $_ => $_ } 1 .. 50000 },
mixed_utf8 => {
$not_utf8 => $not_utf8,
$utf8_can_be_downgraded => $utf8_can_be_downgraded,
$must_be_utf8 => $must_be_utf8,
map { chr($_) => chr($_) } 240 .. 270,
},
pow2_08 =>
{ map { $_ => $_ } 1 .. 8 },
pow2_16 =>
{ map { $_ => $_ } 1 .. 16 },
pow2_32 =>
{ map { $_ => $_ } 1 .. 32 },
pow2_64 =>
{ map { $_ => $_ } 1 .. 64 },
chr_chr_utf8 =>
{ map { chr($_) => chr($_) } 256 .. 270 },
chr_num_utf8 =>
{ map { chr($_) => $_ } 256 .. 270 },
num_chr_utf8 =>
{ map { $_ => chr($_) } 256 .. 270 },
mix_mix_utf8 =>
{ map { ($_ % 2 ? chr($_) : $_) => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
chr_mix_utf8 =>
{ map { chr($_) => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
num_mix_utf8 =>
{ map { $_ => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
mix_num_utf8 =>
{ map { ($_ % 2 ? chr($_) : $_) => $_ } 256 .. 270 },
mix_chr_utf8 =>
{ map { ($_ % 2 ? chr($_) : $_) => chr($_) } 256 .. 270 },
);
my $rand_seed= join("",map chr(rand 256), 1..16);
foreach my $seed ("1234567812345678", undef, $rand_seed) {
foreach my $idx (0 .. (@source_hashes/2)-1) {
my $name= $source_hashes[$idx*2];
my $source_hash= $source_hashes[$idx*2+1];
foreach my $variant (defined($ENV{VARIANT}) ? ($ENV{VARIANT}) : (MIN_VARIANT .. MAX_VARIANT)) {
t/OnDisk.pl view on Meta::CPAN
is_deeply(\@got_keys,\@want_keys,"keys in both are the same ($title)");
is_deeply(\@got_each_keys,\@want_keys,"got_keys and got_each_keys agree ($title)");
is_deeply(\@got_values,\@want_values,"got_values and got_each_values agree ($title)");
is_deeply(\@got_fetch_values,\@want_values,"values in both are same ($title)");
is_deeply(\@got_each_values,\@want_values,"values in both are same ($title)");
{
my @bad;
foreach my $idx (0..$#got_keys) {
if (utf8::is_utf8($got_keys[$idx]) != utf8::is_utf8($want_keys[$idx])) {
push @bad, [ $got_keys[$idx], $want_keys[$idx] ];
}
}
is(0+@bad,0,"no keys with differing utf8 flags ($title)")
or diag Dumper($bad[0]);
}
} else {
ok(0,"test cannot pass if make_file dies") for 1..17;
}
}
}
}
}