Algorithm-MinPerfHashTwoLevel
view release on metacpan
or search on metacpan
MinPerfHashTwoLevel.xs
view on Meta::CPAN
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | MPH_STATIC_INLINE void
sv_set_from_bucket(pTHX_ SV *sv , U8 *strs , const U32 ofs, const U32 len, const U32 idx, const U8 *flags , const U32 bits, const U8 utf8_default, const U8 utf8_default_shift) {
U8 *ptr ;
U8 is_utf8;
if (ofs) {
ptr= (strs) + (ofs);
if (utf8_default) {
is_utf8= utf8_default >> utf8_default_shift;
} else {
GETBITS(is_utf8,flags,idx,bits);
}
} else {
ptr= 0;
is_utf8= 0;
}
/* note that sv_setpvn() will cause the sv to
* become undef if ptr is 0 */
sv_setpvn_mg((sv),ptr,len);
if (is_utf8 > 1) {
sv_utf8_upgrade(sv);
}
else
if (is_utf8) {
SvUTF8_on(sv);
}
else
if (ptr) {
SvUTF8_off(sv);
}
}
MPH_STATIC_INLINE int
lookup_bucket(pTHX_ struct mph_header *mph , U32 index , SV *key_sv , SV *val_sv )
|
MinPerfHashTwoLevel.xs
view on Meta::CPAN
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | U8 *key_pv ;
U64 h0;
U32 h1;
U32 h2;
U32 index ;
U8 *got_key_pv ;
STRLEN got_key_len;
if (SvUTF8(key_sv)) {
SV *tmp = sv_2mortal(newSVsv(key_sv));
sv_utf8_downgrade(tmp,1);
key_sv= tmp;
}
key_pv= SvPV(key_sv,key_len);
h0= mph_hash_with_state(state,key_pv,key_len);
h1= h0 >> 32;
index = h1 % mph->num_buckets;
bucket= buckets + index ;
if (!bucket->xor_val)
return 0;
|
MinPerfHashTwoLevel.xs
view on Meta::CPAN
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | }
return head->variant;
}
void
mph_munmap(struct mph_obj *obj ) {
munmap(obj->header,obj->bytes);
}
STRLEN
normalize_with_flags(pTHX_ SV *sv , SV *normalized_sv , SV *is_utf8_sv , int downgrade) {
STRLEN len;
if (SvROK(sv)) {
croak( "Error: Not expecting a reference value in source hash" );
}
sv_setsv(normalized_sv,sv);
if (SvOK(sv)) {
STRLEN pv_len;
char *pv = SvPV(sv,pv_len);
if (pv_len > 0xFFFF)
croak( "Error: String in source hash is too long to store, max length is %u got length %lu" , 0xFFFF, pv_len);
if (SvUTF8(sv)) {
if (downgrade)
sv_utf8_downgrade(normalized_sv,1);
if (SvUTF8(normalized_sv)) {
SvUTF8_off(normalized_sv);
sv_setiv(is_utf8_sv,1);
} else {
sv_setiv(is_utf8_sv,2);
}
}
return pv_len;
} else {
sv_setiv(is_utf8_sv, 0);
return 0;
}
}
U32
_roundup(const U32 n, const U32 s) {
const U32 r= n % s;
if (r) {
return n + s - r;
} else {
|
MinPerfHashTwoLevel.xs
view on Meta::CPAN
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 | normalize_source_hash(pTHX_ HV *source_hv , AV *keys_av , U32 compute_flags, SV *buf_length_sv , char *state_pv ) {
dMY_CXT;
HE *he ;
U32 buf_length= 0;
U32 ctr;
hv_iterinit(source_hv);
while (he= hv_iternext(source_hv)) {
SV *val_sv = HeVAL(he);
SV *val_normalized_sv ;
SV *val_is_utf8_sv ;
SV *key_sv ;
SV *key_normalized_sv ;
SV *key_is_utf8_sv ;
HV *hv ;
U8 *key_pv ;
STRLEN key_len;
U64 h0;
if (!val_sv) croak( "panic: no sv for value?" );
if (!SvOK(val_sv) && (compute_flags & MPH_F_FILTER_UNDEF)) continue ;
hv= newHV();
val_normalized_sv= newSV(0);
val_is_utf8_sv= newSVuv(0);
key_sv= newSVhek(HeKEY_hek(he));
key_normalized_sv= newSV(0);
key_is_utf8_sv= newSVuv(0);
hv_ksplit(hv,15);
hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY, key_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_NORMALIZED, key_normalized_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8, key_is_utf8_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL, SvREFCNT_inc_simple_NN(val_sv));
hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED, val_normalized_sv);
hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8, val_is_utf8_sv);
/* install everything into the keys_av just in case normalize_with_flags() dies */
av_push(keys_av,newRV_noinc((SV*)hv));
buf_length += normalize_with_flags(aTHX_ key_sv, key_normalized_sv, key_is_utf8_sv, 1);
buf_length += normalize_with_flags(aTHX_ val_sv, val_normalized_sv, val_is_utf8_sv, 0);
key_pv= (U8 *)SvPV(key_normalized_sv,key_len);
h0= mph_hash_with_state(state_pv,key_pv,key_len);
hv_store_ent_with_keysv(hv,MPH_KEYSV_H0, newSVuv(h0));
}
if (buf_length_sv)
sv_setuv(buf_length_sv, buf_length);
/* we now know how many keys there are, and what the max_xor_val should be */
|
MinPerfHashTwoLevel.xs
view on Meta::CPAN
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 | char *state ;
struct mph_bucket *table ;
char *key_flags ;
char *val_flags ;
char *str_buf_start ;
char *str_buf_end ;
char *str_buf_pos ;
U32 i;
STRLEN pv_len;
char *pv ;
U32 key_is_utf8_count[3]={0,0,0};
U32 val_is_utf8_count[2]={0,0};
U32 used_flags;
U32 the_flag;
IV key_is_utf8_generic=-1;
IV val_is_utf8_generic=-1;
for (i= 0; i < bucket_count; i++) {
SV * *got = av_fetch(buckets_av,i,0);
HV *hv = (HV *)SvRV( *got );
HE *key_is_utf8_he = hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
HE *val_is_utf8_he = hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
key_is_utf8_count[SvUV(HeVAL(key_is_utf8_he))]++;
val_is_utf8_count[SvUV(HeVAL(val_is_utf8_he))]++;
}
used_flags= 0;
if (key_is_utf8_count[0]) { the_flag= 0; used_flags++; }
if (key_is_utf8_count[1]) { the_flag= 1; used_flags++; }
if (key_is_utf8_count[2]) { the_flag= 2; used_flags++; }
if (used_flags == 1) {
key_is_utf8_generic= the_flag;
key_flags_rlen= 0;
}
used_flags= 0;
if (val_is_utf8_count[0]) { the_flag= 0; used_flags++; }
if (val_is_utf8_count[1]) { the_flag= 1; used_flags++; }
if (used_flags == 1) {
val_is_utf8_generic= the_flag;
val_flags_rlen= 0;
}
total_size=
+ header_rlen
+ state_rlen
+ table_rlen
+ key_flags_rlen
+ val_flags_rlen
+ str_rlen
|
MinPerfHashTwoLevel.xs
view on Meta::CPAN
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 | head->magic_num= 1278363728;
head->variant= variant;
head->num_buckets= bucket_count;
head->state_ofs= header_rlen;
head->table_ofs= head->state_ofs + state_rlen;
head->key_flags_ofs= head->table_ofs + table_rlen;
head->val_flags_ofs= head->key_flags_ofs + key_flags_rlen;
head->str_buf_ofs= head->val_flags_ofs + val_flags_rlen;
if (val_is_utf8_generic >= 0)
head->general_flags |= (MPH_VALS_ARE_SAME_UTF8NESS_FLAG_BIT | (val_is_utf8_generic << MPH_VALS_ARE_SAME_UTF8NESS_SHIFT));
if (key_is_utf8_generic >= 0)
head->general_flags |= (MPH_KEYS_ARE_SAME_UTF8NESS_FLAG_BIT | (key_is_utf8_generic << MPH_KEYS_ARE_SAME_UTF8NESS_SHIFT));
state= start + head->state_ofs;
table= (struct mph_bucket *)(start + head->table_ofs);
key_flags= start + head->key_flags_ofs;
val_flags= start + head->val_flags_ofs;
str_buf_start= start + head->str_buf_ofs;
str_buf_end= start + total_size;
str_buf_pos= str_buf_start + 2;
Copy(state_pv,state,state_len,char);
|
MinPerfHashTwoLevel.xs
view on Meta::CPAN
973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 | HE *val_normalized_he = hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED,0);
HE *xor_val_he = hv_fetch_ent_with_keysv(hv,MPH_KEYSV_XOR_VAL,0);
if (xor_val_he) {
table[i].xor_val= SvUV(HeVAL(xor_val_he));
} else {
table[i].xor_val= 0;
}
SETOFS(i,key_normalized_he,table,key_ofs,key_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
SETOFS(i,val_normalized_he,table,val_ofs,val_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
if ( key_is_utf8_generic < 0) {
HE *key_is_utf8_he = hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
if (key_is_utf8_he) {
UV u= SvUV(HeVAL(key_is_utf8_he));
SETBITS(u,key_flags,i,2);
} else {
croak( "panic: out of memory? no key_is_utf8_he for %u" ,i);
}
}
if ( val_is_utf8_generic < 0 ) {
HE *val_is_utf8_he = hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
if (val_is_utf8_he) {
UV u= SvUV(HeVAL(val_is_utf8_he));
SETBITS(u,val_flags,i,1);
} else {
croak( "panic: out of memory? no val_is_utf8_he for %u" ,i);
}
}
}
*str_buf_pos = 0; str_buf_pos++;
*str_buf_pos = 128; str_buf_pos++;
{
U32 r= (str_buf_pos - start) % alignment;
if (r) {
str_buf_pos += (alignment - r);
}
|
lib/Algorithm/MinPerfHashTwoLevel.pm
view on Meta::CPAN
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | x = ((x >> 16) ^ x) * 0x45d9f3b;
x = ((x >> 16) ^ x) * 0x45d9f3b;
x = ((x >> 16) ^ x);
which is just a simple 32 bit integer hash function I found at
integer hash function would do .
*NOTE * in Perl a given string may have differing binary representations
if it is encoded as utf8 or not. This module uses the same conventions
as Perl itself, which is that keys are stored in their minimal form when
possible, and are only stored in their unicode (utf8) form when they
cannot be downgraded to latin-1. This ensures that the unicode and latin-1
representations of a given string are treated as the same key. This module
deals with this by "normalizing" the keys and values into latin-1, but
tracking the representation as a flag. See key_normalized and key_is_utf8
(and their 'val' equivalents) documented in the construct method.
|
lib/Algorithm/MinPerfHashTwoLevel.pm
view on Meta::CPAN
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | on the 'source_hash' passed into the constructor, or requires one to passed
in as an argument.
Returns an array of hashes containing information about each bucket:
{
"h1_keys" => 2,
"h0" => "17713559403787135240" ,
"idx" => 0,
"key" => "\x{103}" ,
"key_is_utf8" => 1,
"key_normalized" => "\304\203" ,
"val" => "\x{103}" ,
"val_is_utf8" => 1,
"val_normalized" => "\304\203" ,
"xor_val" => 2
},
The meaning of these keys is as follows:
|
lib/Algorithm/MinPerfHashTwoLevel.pm
view on Meta::CPAN
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 | The hash value computed for this key.
|
lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm
view on Meta::CPAN
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 | |
mph2l.h
view on Meta::CPAN
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | } STMT_END
MY_CXT_INIT; \
MPH_INIT_KEYSV(MPH_KEYSV_IDX, "idx" ); \
MPH_INIT_KEYSV(MPH_KEYSV_H1_KEYS, "h1_keys" ); \
MPH_INIT_KEYSV(MPH_KEYSV_XOR_VAL, "xor_val" ); \
MPH_INIT_KEYSV(MPH_KEYSV_H0, "h0" ); \
MPH_INIT_KEYSV(MPH_KEYSV_KEY, "key" ); \
MPH_INIT_KEYSV(MPH_KEYSV_KEY_NORMALIZED, "key_normalized" ); \
MPH_INIT_KEYSV(MPH_KEYSV_KEY_IS_UTF8, "key_is_utf8" ); \
MPH_INIT_KEYSV(MPH_KEYSV_VAL, "val" ); \
MPH_INIT_KEYSV(MPH_KEYSV_VAL_NORMALIZED, "val_normalized" ); \
MPH_INIT_KEYSV(MPH_KEYSV_VAL_IS_UTF8, "val_is_utf8" ); \
\
MPH_INIT_KEYSV(MPH_KEYSV_VARIANT, "variant" ); \
MPH_INIT_KEYSV(MPH_KEYSV_COMPUTE_FLAGS, "compute_flags" ); \
MPH_INIT_KEYSV(MPH_KEYSV_STATE, "state" ); \
MPH_INIT_KEYSV(MPH_KEYSV_SOURCE_HASH, "source_hash" ); \
MPH_INIT_KEYSV(MPH_KEYSV_BUF_LENGTH, "buf_length" ); \
MPH_INIT_KEYSV(MPH_KEYSV_BUCKETS, "buckets" ); \
MPH_INIT_KEYSV(MPH_KEYSV_MOUNT, "mount" ); \
} STMT_END
|
ppport.h
view on Meta::CPAN
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | pv_escape() NEED_pv_escape NEED_pv_escape_GLOBAL
pv_pretty() NEED_pv_pretty NEED_pv_pretty_GLOBAL
sv_2pv_flags() NEED_sv_2pv_flags NEED_sv_2pv_flags_GLOBAL
sv_2pvbyte() NEED_sv_2pvbyte NEED_sv_2pvbyte_GLOBAL
sv_catpvf_mg() NEED_sv_catpvf_mg NEED_sv_catpvf_mg_GLOBAL
sv_catpvf_mg_nocontext() NEED_sv_catpvf_mg_nocontext NEED_sv_catpvf_mg_nocontext_GLOBAL
sv_pvn_force_flags() NEED_sv_pvn_force_flags NEED_sv_pvn_force_flags_GLOBAL
sv_setpvf_mg() NEED_sv_setpvf_mg NEED_sv_setpvf_mg_GLOBAL
sv_setpvf_mg_nocontext() NEED_sv_setpvf_mg_nocontext NEED_sv_setpvf_mg_nocontext_GLOBAL
sv_unmagicext() NEED_sv_unmagicext NEED_sv_unmagicext_GLOBAL
utf8_to_uvchr_buf() NEED_utf8_to_uvchr_buf NEED_utf8_to_uvchr_buf_GLOBAL
vload_module() NEED_vload_module NEED_vload_module_GLOBAL
vmess() NEED_vmess NEED_vmess_GLOBAL
vnewSVpvf() NEED_vnewSVpvf NEED_vnewSVpvf_GLOBAL
warner() NEED_warner NEED_warner_GLOBAL
To avoid namespace conflicts, you can change the namespace of the
explicitly exported functions / variables using the C<DPPP_NAMESPACE>
macro. Just C<
|
ppport.h
view on Meta::CPAN
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 | PERL_MAGIC_shared_scalar|5.007003||p
PERL_MAGIC_shared|5.007003||p
PERL_MAGIC_sigelem|5.007002||p
PERL_MAGIC_sig|5.007002||p
PERL_MAGIC_substr|5.007002||p
PERL_MAGIC_sv|5.007002||p
PERL_MAGIC_taint|5.007002||p
PERL_MAGIC_tiedelem|5.007002||p
PERL_MAGIC_tiedscalar|5.007002||p
PERL_MAGIC_tied|5.007002||p
PERL_MAGIC_utf8|5.008001||p
PERL_MAGIC_uvar_elem|5.007003||p
PERL_MAGIC_uvar|5.007002||p
PERL_MAGIC_vec|5.007002||p
PERL_MAGIC_vstring|5.008001||p
PERL_PV_ESCAPE_ALL|5.009004||p
PERL_PV_ESCAPE_FIRSTCHAR|5.009004||p
PERL_PV_ESCAPE_NOBACKSLASH|5.009004||p
PERL_PV_ESCAPE_NOCLEAR|5.009004||p
PERL_PV_ESCAPE_QUOTE|5.009004||p
PERL_PV_ESCAPE_RE|5.009005||p
|
ppport.h
view on Meta::CPAN
971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 | SvPV_nomg_const|5.009003||p
SvPV_nomg_nolen|5.013007||p
SvPV_nomg|5.007002||p
SvPV_renew|5.009003||p
SvPV_set|||
SvPVbyte_force||5.009002|
SvPVbyte_nolen||5.006000|
SvPVbytex_force||5.006000|
SvPVbytex||5.006000|
SvPVbyte|5.006000||p
SvPVutf8_force||5.006000|
SvPVutf8_nolen||5.006000|
SvPVutf8x_force||5.006000|
SvPVutf8x||5.006000|
SvPVutf8||5.006000|
SvPVx|||
SvPV|||
SvREADONLY_off|||
SvREADONLY_on|||
SvREADONLY|||
SvREFCNT_dec_NN||5.017007|
SvREFCNT_dec|||
SvREFCNT_inc_NN|5.009004||p
SvREFCNT_inc_simple_NN|5.009004||p
SvREFCNT_inc_simple_void_NN|5.009004||p
|
ppport.h
view on Meta::CPAN
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 | Zero|||
__ASSERT_|||p
_aMY_CXT|5.007003||p
_inverse_folds|||
_is_grapheme|||
_is_in_locale_category|||
_new_invlist_C_array|||
_pMY_CXT|5.007003||p
_to_fold_latin1|||n
_to_upper_title_latin1|||
_to_utf8_case|||
_variant_byte_number|||n
_warn_problematic_locale|||n
aMY_CXT_|5.007003||p
aMY_CXT|5.007003||p
aTHXR_|5.024000||p
aTHXR|5.024000||p
aTHX_|5.006000||p
aTHX|5.006000||p
abort_execution|||
add_above_Latin1_folds|||
|
ppport.h
view on Meta::CPAN
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 | bad_type_pv|||
bind_match|||
block_end||5.004000|
block_gimme||5.004000|
block_start||5.004000|
blockhook_register||5.013003|
boolSV|5.004000||p
boot_core_PerlIO|||
boot_core_UNIVERSAL|||
boot_core_mro|||
bytes_cmp_utf8||5.013007|
cBOOL|5.013000||p
call_argv|5.006000||p
call_atexit||5.006000|
call_list||5.004000|
call_method|5.006000||p
call_pv|5.006000||p
call_sv|5.006000||p
caller_cx|5.013005|5.006000|p
calloc||5.007002|n
cando|||
|
ppport.h
view on Meta::CPAN
1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 | do_seek|||
do_semop|||
do_shmio|||
do_smartmatch|||
do_spawn_nowait|||
do_spawn|||
do_sprintf|||
do_sv_dump||5.006000|
do_sysseek|||
do_tell|||
do_trans_complex_utf8|||
do_trans_complex|||
do_trans_count_utf8|||
do_trans_count|||
do_trans_simple_utf8|||
do_trans_simple|||
do_trans|||
do_vecget|||
do_vecset|||
do_vop|||
docatch|||
does_utf8_overflow|||n
doeval_compile|||
dofile|||
dofindlabel|||
doform|||
doing_taint||5.008001|n
dooneliner|||
doopen_pm|||
doparseform|||
dopoptoeval|||
dopoptogivenfor|||
|
ppport.h
view on Meta::CPAN
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 | find_runcv||5.008001|
find_rundefsv||5.013002|
find_script|||
find_span_end_mask|||n
find_span_end|||n
first_symbol|||n
fixup_errno_string|||
foldEQ_latin1_s2_folded|||n
foldEQ_latin1||5.013008|n
foldEQ_locale||5.013002|n
foldEQ_utf8||5.013002|
foldEQ||5.013002|n
fold_constants|||
forbid_setid|||
force_ident_maybe_lex|||
force_ident|||
force_list|||
force_next|||
force_strict_version|||
force_version|||
force_word|||
|
ppport.h
view on Meta::CPAN
1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 | hv_rand_set||5.018000|
hv_riter_p||5.009003|
hv_riter_set||5.009003|
hv_scalar||5.009001|
hv_store_ent||5.003070|
hv_stores|5.009004||p
hv_store|||
hv_undef_flags|||
hv_undef|||
ibcmp_locale||5.004000|
ibcmp_utf8||5.007003|
ibcmp|||
incline|||
incpush_if_exists|||
incpush_use_sep|||
incpush|||
ingroup|||
init_argv_symbols|||
init_constants|||
init_dbargs|||
init_debugger|||
|
ppport.h
view on Meta::CPAN
1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 | isASCII_A|||p
isASCII|5.006000||p
isBLANK_A|||p
isBLANK|5.006001||p
isC9_STRICT_UTF8_CHAR|||n
isCNTRL_A|||p
isCNTRL|5.006000||p
isDIGIT_A|||p
isDIGIT|||p
isFF_OVERLONG|||n
isFOO_utf8_lc|||
isGCB|||
isGRAPH_A|||p
isGRAPH|5.006000||p
isIDCONT_A|||p
isIDCONT|5.017008|5.017008|p
isIDFIRST_A|||p
isIDFIRST|||p
isLB|||
isLOWER_A|||p
isLOWER|||p
|
ppport.h
view on Meta::CPAN
1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 | isUPPER|||p
isUTF8_CHAR_flags|||
isUTF8_CHAR||5.021001|n
isWB|||
isWORDCHAR_A|||p
isWORDCHAR|5.013006|5.013006|p
isXDIGIT_A|||p
isXDIGIT|5.006000||p
is_an_int|||
is_ascii_string||5.011000|n
is_c9strict_utf8_string_loclen|||n
is_c9strict_utf8_string_loc|||n
is_c9strict_utf8_string|||n
is_handle_constructor|||n
is_invariant_string||5.021007|n
is_lvalue_sub||5.007001|
is_safe_syscall||5.019004|
is_ssc_worth_it|||n
is_strict_utf8_string_loclen|||n
is_strict_utf8_string_loc|||n
is_strict_utf8_string|||n
is_utf8_char_buf||5.015008|n
is_utf8_common_with_len|||
is_utf8_common|||
is_utf8_cp_above_31_bits|||n
is_utf8_fixed_width_buf_flags|||n
is_utf8_fixed_width_buf_loc_flags|||n
is_utf8_fixed_width_buf_loclen_flags|||n
is_utf8_invariant_string_loc|||n
is_utf8_invariant_string|||n
is_utf8_non_invariant_string|||n
is_utf8_overlong_given_start_byte_ok|||n
is_utf8_string_flags|||n
is_utf8_string_loc_flags|||n
is_utf8_string_loclen_flags|||n
is_utf8_string_loclen||5.009003|n
is_utf8_string_loc||5.008001|n
is_utf8_string||5.006001|n
is_utf8_valid_partial_char_flags|||n
is_utf8_valid_partial_char|||n
isa_lookup|||
isinfnansv|||
isinfnan||5.021004|n
items|||n
ix|||n
jmaybe|||
join_exact|||
keyword_plugin_standard|||
keyword|||
leave_scope|||
|
ppport.h
view on Meta::CPAN
1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 | magic_setlvref|||
magic_setmglob|||
magic_setnkeys|||
magic_setnonelem|||
magic_setpack|||
magic_setpos|||
magic_setregexp|||
magic_setsig|||
magic_setsubstr|||
magic_settaint|||
magic_setutf8|||
magic_setuvar|||
magic_setvec|||
magic_set|||
magic_sizepack|||
magic_wipepack|||
make_matcher|||
make_trie|||
malloc_good_size|||n
malloced_size|||n
malloc||5.007002|n
|
ppport.h
view on Meta::CPAN
1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 | mro_register||5.010001|
mro_set_mro||5.010001|
mro_set_private_data||5.010001|
mul128|||
multiconcat_stringify|||
multideref_stringify|||
my_atof2||5.007002|
my_atof3|||
my_atof||5.006000|
my_attrs|||
my_bytes_to_utf8|||n
my_chsize|||
my_clearenv|||
my_cxt_index|||
my_cxt_init|||
my_dirfd||5.009005|n
my_exit_jump|||
my_exit|||
my_failure_exit||5.004000|
my_fflush_all||5.006000|
my_fork||5.007003|n
|
ppport.h
view on Meta::CPAN
1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 | my_stat_flags|||
my_stat||5.024000|
my_strerror|||
my_strftime||5.007002|
my_strlcat|5.009004||pn
my_strlcpy|5.009004||pn
my_strnlen|||pn
my_strtod|||n
my_unexec|||
my_vsnprintf||5.009004|n
need_utf8|||n
newANONATTRSUB||5.006000|
newANONHASH|||
newANONLIST|||
newANONSUB|||
newASSIGNOP|||
newATTRSUB_x|||
newATTRSUB||5.006000|
newAVREF|||
newAV|||
newBINOP|||
|
ppport.h
view on Meta::CPAN
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 | newSVavdefelem|||
newSVhek||5.009003|
newSViv|||
newSVnv|||
newSVpadname||5.017004|
newSVpv_share||5.013006|
newSVpvf_nocontext|||vn
newSVpvf||5.004000|v
newSVpvn_flags|5.010001||p
newSVpvn_share|5.007001||p
newSVpvn_utf8|5.010001||p
newSVpvn|5.004050||p
newSVpvs_flags|5.010001||p
newSVpvs_share|5.009003||p
newSVpvs|5.009003||p
newSVpv|||
newSVrv|||
newSVsv_flags|||
newSVsv_nomg|||
newSVsv|||
newSVuv|5.006000||p
|
ppport.h
view on Meta::CPAN
2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 | nextargv|||
nextchar|||
ninstr|||n
no_bareword_allowed|||
no_fh_allowed|||
no_op|||
noperl_die|||vn
not_a_number|||
not_incrementable|||
nothreadhook||5.008000|
notify_parser_that_changed_to_utf8|||
nuke_stacks|||
num_overflow|||n
oopsAV|||
oopsHV|||
op_append_elem||5.013006|
op_append_list||5.013006|
op_class|||
op_clear|||
op_contextualize||5.013006|
op_convert_list||5.021006|
|
ppport.h
view on Meta::CPAN
2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 | sv_2iuv_common|||
sv_2iuv_non_preserve|||
sv_2iv_flags||5.009001|
sv_2iv|||
sv_2mortal|||
sv_2nv_flags||5.013001|
sv_2pv_flags|5.007002||p
sv_2pv_nolen|5.006000||p
sv_2pvbyte_nolen|5.006000||p
sv_2pvbyte|5.006000||p
sv_2pvutf8_nolen||5.006000|
sv_2pvutf8||5.006000|
sv_2pv|||
sv_2uv_flags||5.009001|
sv_2uv|5.004000||p
sv_add_arena|||
sv_add_backref|||
sv_backoff|||n
sv_bless|||
sv_buf_to_ro|||
sv_buf_to_rw|||
sv_cat_decode||5.008001|
|
ppport.h
view on Meta::CPAN
2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 | sv_gets||5.003070|
sv_grow|||
sv_i_ncmp|||
sv_inc_nomg||5.013002|
sv_inc|||
sv_insert_flags||5.010001|
sv_insert|||
sv_isa|||
sv_isobject|||
sv_iv||5.005000|
sv_len_utf8_nomg|||
sv_len_utf8||5.006000|
sv_len|||
sv_magic_portable|5.024000|5.004000|p
sv_magicext_mglob|||
sv_magicext||5.007003|
sv_magic|||
sv_mortalcopy_flags|||
sv_mortalcopy|||
sv_ncmp|||
sv_newmortal|||
sv_newref|||
|
ppport.h
view on Meta::CPAN
2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 | sv_pos_u2b_forwards|||n
sv_pos_u2b_midway|||n
sv_pos_u2b||5.006000|
sv_pvbyten_force||5.006000|
sv_pvbyten||5.006000|
sv_pvbyte||5.006000|
sv_pvn_force_flags|5.007002||p
sv_pvn_force|||
sv_pvn_nomg|5.007003|5.005000|p
sv_pvn||5.005000|
sv_pvutf8n_force||5.006000|
sv_pvutf8n||5.006000|
sv_pvutf8||5.006000|
sv_pv||5.006000|
sv_recode_to_utf8||5.007003|
sv_reftype|||
sv_ref||5.015004|
sv_replace|||
sv_report_used|||
sv_resetpvn|||
sv_reset|||
sv_rvunweaken|||
sv_rvweaken||5.006000|
sv_set_undef|||
sv_sethek|||
|
ppport.h
view on Meta::CPAN
2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 | sv_uni_display||5.007003|
sv_unmagicext|5.013008||p
sv_unmagic|||
sv_unref_flags||5.007001|
sv_unref|||
sv_untaint||5.004000|
sv_upgrade|||
sv_usepvn_flags||5.009004|
sv_usepvn_mg|5.004050||p
sv_usepvn|||
sv_utf8_decode|||
sv_utf8_downgrade|||
sv_utf8_encode||5.006000|
sv_utf8_upgrade_flags_grow||5.011000|
sv_utf8_upgrade_flags||5.007002|
sv_utf8_upgrade_nomg||5.007002|
sv_utf8_upgrade||5.007001|
sv_uv|5.005000||p
sv_vcatpvf_mg|5.006000|5.004000|p
sv_vcatpvfn_flags||5.017002|
sv_vcatpvfn||5.004000|
sv_vcatpvf|5.006000|5.004000|p
sv_vsetpvf_mg|5.006000|5.004000|p
sv_vsetpvfn||5.004000|
sv_vsetpvf|5.006000|5.004000|p
svtype|||
swallow_bom|||
|
ppport.h
view on Meta::CPAN
2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 | sys_init3||5.010000|n
sys_init||5.010000|n
sys_intern_clear|||
sys_intern_dup|||
sys_intern_init|||
sys_term||5.010000|n
taint_env|||
taint_proper|||
tied_method|||v
tmps_grow_p|||
toFOLD_utf8_safe|||
toFOLD_utf8||5.019001|
toFOLD_uvchr||5.023009|
toFOLD||5.019001|
toLOWER_L1||5.019001|
toLOWER_LC||5.004000|
toLOWER_utf8_safe|||
toLOWER_utf8||5.015007|
toLOWER_uvchr||5.023009|
toLOWER|||
toTITLE_utf8_safe|||
toTITLE_utf8||5.015007|
toTITLE_uvchr||5.023009|
toTITLE||5.019001|
toUPPER_utf8_safe|||
toUPPER_utf8||5.015007|
toUPPER_uvchr||5.023009|
toUPPER|||
to_byte_substr|||
to_lower_latin1|||n
to_utf8_substr|||
tokenize_use|||
tokeq|||
tokereport|||
too_few_arguments_pv|||
too_many_arguments_pv|||
translate_substr_offsets|||n
traverse_op_tree|||
try_amagic_bin|||
try_amagic_un|||
turkic_fc|||
|
ppport.h
view on Meta::CPAN
2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 | unpackstring||5.008001|
unreferenced_to_tmp_stack|||
unshare_hek_or_pvn|||
unshare_hek|||
unsharepvn||5.003070|
unwind_handler_stack|||
update_debugger_info|||
upg_version||5.009005|
usage|||
utf16_textfilter|||
utf16_to_utf8_reversed||5.006001|
utf16_to_utf8||5.006001|
utf8_distance||5.006000|
utf8_hop_back|||n
utf8_hop_forward|||n
utf8_hop_safe|||n
utf8_hop||5.006000|n
utf8_length||5.007001|
utf8_mg_len_cache_update|||
utf8_mg_pos_cache_update|||
utf8_to_uvchr_buf|5.015009|5.015009|p
utf8_to_uvchr|||p
utf8n_to_uvchr_error|||n
utf8n_to_uvchr||5.007001|n
utf8n_to_uvuni||5.007001|
utilize|||
uvchr_to_utf8_flags||5.007003|
uvchr_to_utf8||5.007001|
uvoffuni_to_utf8_flags||5.019004|
uvuni_to_utf8_flags||5.007003|
uvuni_to_utf8||5.007001|
valid_utf8_to_uvchr|||n
validate_suid|||
variant_under_utf8_count|||n
varname|||
vcmp||5.009000|
vcroak||5.006000|
vdeb||5.007003|
vform||5.006000|
visit|||
vivify_defelem|||
vivify_ref|||
vload_module|5.006000||p
vmess|5.006000|5.006000|p
|
ppport.h
view on Meta::CPAN
5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 | return p - str;
}
/* Versions prior to this accepted things that are now considered
* malformations, and didn't return -1 on error with warnings enabled
* */
/* This implementation brings modern, generally more restricted standards to
* utf8_to_uvchr_buf. Some of these are security related, and clearly must
* be done. But its arguable that the others need not, and hence should not.
* The reason they're here is that a module that intends to play with the
* latest perls shoud be able to work the same in all releases. An example is
* that perl no longer accepts any UV for a code point, but limits them to
* IV_MAX or below. This is for future internal use of the larger code points. * If it turns out that some of these changes are breaking code that isn't
* intended to work with modern perls, the tighter restrictions could be
* relaxed. khw thinks this is unlikely, but has been wrong in the past. */
/* Choose which underlying implementation to use . At least one must be
* present or the perl is too early to handle this function */
static UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send , STRLEN * retlen);
static
extern UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send , STRLEN * retlen);
UV
DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 *s , const U8 *send , STRLEN *retlen )
{
UV ret;
STRLEN curlen;
bool overflows = 0;
const U8 *cur_s = s;
const bool do_warnings = ckWARN_d(WARN_UTF8);
if ( send > s) {
curlen = send - s;
}
|
ppport.h
view on Meta::CPAN
5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 | assert(0); /* Modern perls die under this circumstance */
curlen = 0;
if (! do_warnings) { /* Handle empty here if no warnings needed */
if (retlen) *retlen = 0;
return UNICODE_REPLACEMENT;
}
}
/* The modern version allows anything that evaluates to a legal UV, but not
* overlongs nor an empty input */
ret = _ppport_utf8_to_uvchr_buf_callee(
s, curlen, retlen, (UTF8_ALLOW_ANYUV
& ~(UTF8_ALLOW_LONG|UTF8_ALLOW_EMPTY)));
/* But actually, modern versions restrict the UV to being no more than what
* an IV can hold */
if (ret > PERL_INT_MAX) {
overflows = 1;
}
|
ppport.h
view on Meta::CPAN
5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 | ret, *cur_s , *s );
}
if (retlen) {
*retlen = (STRLEN) -1;
}
return 0;
}
}
/* If failed and warnings are off, to emulate the behavior of the real
* utf8_to_uvchr(), try again, allowing anything. (Note a return of 0 is
* ok if the input was '\0' ) */
if (UNLIKELY(ret == 0 && (curlen == 0 || *s != '\0' ))) {
/* If curlen is 0, we already handled the case where warnings are
* disabled, so this 'if' will be true, and we won't look at the
* contents of 's' */
if (do_warnings) {
*retlen = (STRLEN) -1;
}
else {
ret = _ppport_utf8_to_uvchr_buf_callee(
s, curlen, retlen, UTF8_ALLOW_ANY);
/* Override with the REPLACEMENT character, as that is what the
* modern version of this function returns */
ret = UNICODE_REPLACEMENT;
/* Versions earlier than this don't necessarily return the proper
* length . It should not extend past the end of string, nor past
* what the first byte indicates the length is, nor past the
|
ppport.h
view on Meta::CPAN
5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 | }
}
return ret;
}
to read past a NUL, making it much less likely to read
off the end of the buffer. A NUL indicates the start
of the next character anyway. If the input isn't
NUL-terminated, the function remains unsafe, as it
always has been. */
((*(s) == '\0' ) \
? utf8_to_uvchr_buf(s,((s)+1), lp) /* Handle single NUL specially */ \
: utf8_to_uvchr_buf(s, (s) + my_strnlen((char *) (s), UTF8SKIP(s)), (lp)))
|
ppport.h
view on Meta::CPAN
5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 | |
ppport.h
view on Meta::CPAN
6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 | ? ((len) ? newSVpv((data), (len)) : newSVpv( "" , 0)) \
: newSV(0))
static SV * DPPP_(my_newSVpvn_flags)(pTHX_ const char *s , STRLEN len, U32 flags);
static
|
ppport.h
view on Meta::CPAN
6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 | char *
DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv , STRLEN *lp )
{
sv_utf8_downgrade(sv,0);
return SvPV(sv, *lp );
}
/* Hint: sv_2pvbyte
* Use the SvPVbyte() macro instead of sv_2pvbyte().
*/
|
ppport.h
view on Meta::CPAN
7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 | const MGVTBL* const virt = mg->mg_virtual;
if (mg->mg_type == type && virt == vtbl) {
*mgp = mg->mg_moremagic;
if (virt && virt->svt_free)
virt->svt_free(aTHX_ sv, mg);
if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) {
if (mg->mg_len > 0)
Safefree(mg->mg_ptr);
else if (mg->mg_len == HEf_SVKEY) /* Questionable on older perls... */
SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr));
else if (mg->mg_type == PERL_MAGIC_utf8)
Safefree(mg->mg_ptr);
}
if (mg->mg_flags & MGf_REFCOUNTED)
SvREFCNT_dec(mg->mg_obj);
Safefree(mg);
}
else
mgp = &mg ->mg_moremagic;
}
if (SvMAGIC(sv)) {
|
ppport.h
view on Meta::CPAN
8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 | DPPP_(my_pv_escape)(pTHX_ SV *dsv , char const * const str,
const STRLEN count, const STRLEN max,
STRLEN * const escaped, const U32 flags)
{
const char esc = flags & PERL_PV_ESCAPE_RE ? '%' : '\\' ;
const char dq = flags & PERL_PV_ESCAPE_QUOTE ? '"' : esc;
char octbuf[32] = "%123456789ABCDF" ;
STRLEN wrote = 0;
STRLEN chsize = 0;
STRLEN readsize = 1;
bool isuni = flags & PERL_PV_ESCAPE_UNI ? 1 : 0;
const char *pv = str;
const char * const end = pv + count;
octbuf[0] = esc;
if (!(flags & PERL_PV_ESCAPE_NOCLEAR))
sv_setpvs(dsv, "" );
if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8*)pv, count))
isuni = 1;
for (; pv < end && (!max || wrote < max) ; pv += readsize) {
const UV u =
isuni ? utf8_to_uvchr_buf((U8*)pv, end, &readsize ) :
(U8) *pv ;
const U8 c = (U8)u & 0xFF;
if (u > 255 || (flags & PERL_PV_ESCAPE_ALL)) {
if (flags & PERL_PV_ESCAPE_FIRSTCHAR)
chsize = my_snprintf(octbuf, sizeof octbuf,
"%" UVxf, u);
else
chsize = my_snprintf(octbuf, sizeof octbuf,
|
t/Algo_v5.t
view on Meta::CPAN
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | }
my %hash = ( "A" .. "Z" );
my $buckets = $o ->compute(\ %hash );
is_deeply( $buckets ,
[
{
"h0" => "2811388775115704789" ,
"idx" => 0,
"key" => "M" ,
"key_is_utf8" => 0,
"key_normalized" => "M" ,
"val" => "N" ,
"val_is_utf8" => 0,
"val_normalized" => "N"
},
{
"h0" => "8860848721851830215" ,
"h1_keys" => 2,
"idx" => 1,
"key" => "K" ,
"key_is_utf8" => 0,
"key_normalized" => "K" ,
"val" => "L" ,
"val_is_utf8" => 0,
"val_normalized" => "L" ,
"xor_val" => 1
},
{
"h0" => "2333874141850904510" ,
"h1_keys" => 1,
"idx" => 2,
"key" => "G" ,
"key_is_utf8" => 0,
"key_normalized" => "G" ,
"val" => "H" ,
"val_is_utf8" => 0,
"val_normalized" => "H" ,
"xor_val" => "4294967295"
},
{
"h0" => "16876117158654460122" ,
"idx" => 3,
"key" => "Y" ,
"key_is_utf8" => 0,
"key_normalized" => "Y" ,
"val" => "Z" ,
"val_is_utf8" => 0,
"val_normalized" => "Z"
},
{
"h0" => "8715353882719101949" ,
"h1_keys" => 1,
"idx" => 4,
"key" => "S" ,
"key_is_utf8" => 0,
"key_normalized" => "S" ,
"val" => "T" ,
"val_is_utf8" => 0,
"val_normalized" => "T" ,
"xor_val" => "4294967294"
},
{
"h0" => "7118026915973626049" ,
"h1_keys" => 1,
"idx" => 5,
"key" => "U" ,
"key_is_utf8" => 0,
"key_normalized" => "U" ,
"val" => "V" ,
"val_is_utf8" => 0,
"val_normalized" => "V" ,
"xor_val" => "4294967293"
},
{
"h0" => "8257329964001049281" ,
"h1_keys" => 2,
"idx" => 6,
"key" => "O" ,
"key_is_utf8" => 0,
"key_normalized" => "O" ,
"val" => "P" ,
"val_is_utf8" => 0,
"val_normalized" => "P" ,
"xor_val" => 1
},
{
"h0" => "5518171323424817881" ,
"h1_keys" => 1,
"idx" => 7,
"key" => "E" ,
"key_is_utf8" => 0,
"key_normalized" => "E" ,
"val" => "F" ,
"val_is_utf8" => 0,
"val_normalized" => "F" ,
"xor_val" => "4294967291"
},
{
"h0" => "3591181703942984702" ,
"h1_keys" => 1,
"idx" => 8,
"key" => "Q" ,
"key_is_utf8" => 0,
"key_normalized" => "Q" ,
"val" => "R" ,
"val_is_utf8" => 0,
"val_normalized" => "R" ,
"xor_val" => "4294967288"
},
{
"h0" => "8458370515337648683" ,
"h1_keys" => 1,
"idx" => 9,
"key" => "W" ,
"key_is_utf8" => 0,
"key_normalized" => "W" ,
"val" => "X" ,
"val_is_utf8" => 0,
"val_normalized" => "X" ,
"xor_val" => "4294967286"
},
{
"h0" => "18153270191496908466" ,
"idx" => 10,
"key" => "C" ,
"key_is_utf8" => 0,
"key_normalized" => "C" ,
"val" => "D" ,
"val_is_utf8" => 0,
"val_normalized" => "D"
},
{
"h0" => "5043040936135718210" ,
"h1_keys" => 2,
"idx" => 11,
"key" => "A" ,
"key_is_utf8" => 0,
"key_normalized" => "A" ,
"val" => "B" ,
"val_is_utf8" => 0,
"val_normalized" => "B" ,
"xor_val" => 2
},
{
"h0" => "15354065489600908969" ,
"h1_keys" => 1,
"idx" => 12,
"key" => "I" ,
"key_is_utf8" => 0,
"key_normalized" => "I" ,
"val" => "J" ,
"val_is_utf8" => 0,
"val_normalized" => "J" ,
"xor_val" => "4294967285"
}
],
"simple hash A-Z" ,
) or diag Dumper( $buckets );
%hash = map {
my $key = chr ( $_ );
utf8::upgrade( $key ) if $_ % 2;
$key => $key
} 250 .. 260;
$buckets = $o ->compute(\ %hash );
is_deeply( $buckets ,
[
{
"h0" => "7329038856428266488" ,
"h1_keys" => 1,
"idx" => 0,
"key" => "\376" ,
"key_is_utf8" => 0,
"key_normalized" => "\376" ,
"val" => "\376" ,
"val_is_utf8" => 0,
"val_normalized" => "\376" ,
"xor_val" => "4294967293"
},
{
"h0" => "7406429479659263986" ,
"h1_keys" => 2,
"idx" => 1,
"key" => "\x{101}" ,
"key_is_utf8" => 1,
"key_normalized" => "\304\201" ,
"val" => "\x{101}" ,
"val_is_utf8" => 1,
"val_normalized" => "\304\201" ,
"xor_val" => 2
},
{
"h0" => "7026079938176527097" ,
"h1_keys" => 1,
"idx" => 2,
"key" => "\x{103}" ,
"key_is_utf8" => 1,
"key_normalized" => "\304\203" ,
"val" => "\x{103}" ,
"val_is_utf8" => 1,
"val_normalized" => "\304\203" ,
"xor_val" => "4294967292"
},
{
"h0" => "2560445542346638988" ,
"idx" => 3,
"key" => "\x{100}" ,
"key_is_utf8" => 1,
"key_normalized" => "\304\200" ,
"val" => "\x{100}" ,
"val_is_utf8" => 1,
"val_normalized" => "\304\200"
},
{
"h0" => "15075586565050556610" ,
"h1_keys" => 3,
"idx" => 4,
"key" => "\x{102}" ,
"key_is_utf8" => 1,
"key_normalized" => "\304\202" ,
"val" => "\x{102}" ,
"val_is_utf8" => 1,
"val_normalized" => "\304\202" ,
"xor_val" => 1
},
{
"h0" => "10520228168695442556" ,
"h1_keys" => 1,
"idx" => 5,
"key" => "\x{ff}" ,
"key_is_utf8" => 2,
"key_normalized" => "\377" ,
"val" => "\x{ff}" ,
"val_is_utf8" => 1,
"val_normalized" => "\303\277" ,
"xor_val" => "4294967290"
},
{
"h0" => "10697593882658072482" ,
"h1_keys" => 1,
"idx" => 6,
"key" => "\372" ,
"key_is_utf8" => 0,
"key_normalized" => "\372" ,
"val" => "\372" ,
"val_is_utf8" => 0,
"val_normalized" => "\372" ,
"xor_val" => "4294967286"
},
{
"h0" => "12905152200806003791" ,
"idx" => 7,
"key" => "\374" ,
"key_is_utf8" => 0,
"key_normalized" => "\374" ,
"val" => "\374" ,
"val_is_utf8" => 0,
"val_normalized" => "\374"
},
{
"h0" => "11318277185996573588" ,
"idx" => 8,
"key" => "\x{fd}" ,
"key_is_utf8" => 2,
"key_normalized" => "\375" ,
"val" => "\x{fd}" ,
"val_is_utf8" => 1,
"val_normalized" => "\303\275"
},
{
"h0" => "11837163118808456557" ,
"idx" => 9,
"key" => "\x{104}" ,
"key_is_utf8" => 1,
"key_normalized" => "\304\204" ,
"val" => "\x{104}" ,
"val_is_utf8" => 1,
"val_normalized" => "\304\204"
},
{
"h0" => "13939891494320893160" ,
"h1_keys" => 2,
"idx" => 10,
"key" => "\x{fb}" ,
"key_is_utf8" => 2,
"key_normalized" => "\373" ,
"val" => "\x{fb}" ,
"val_is_utf8" => 1,
"val_normalized" => "\303\273" ,
"xor_val" => 2
}
],
"hash with utf8 works as expected" ,
) or diag Dumper( $buckets );
|
t/OnDisk.pl
view on Meta::CPAN
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | ok( defined ( $srand ), "srand as expected: $srand" );
my $eval_ok = eval {
tie my ( %fail ), $class => $0;
1;
};
my $error = ! $eval_ok && $@;
ok( $error , "it failed: $@" );
my $tmpdir = File::Temp->newdir();
my $not_utf8 = "not utf8: \x{DF}" ;
my $utf8_can_be_downgraded = "was utf8: \x{DF}" ;
utf8::upgrade( $utf8_can_be_downgraded );
my $must_be_utf8 = "is utf8: \x{100}" ;
my @source_hashes = (
simple => {
foo => "bar" ,
baz => "bop" ,
fiz => "shmang" ,
plop => "shwoosh" ,
},
large => { map { $_ => $_ } 1 .. 50000 },
mixed_utf8 => {
$not_utf8 => $not_utf8 ,
$utf8_can_be_downgraded => $utf8_can_be_downgraded ,
$must_be_utf8 => $must_be_utf8 ,
map { chr ( $_ ) => chr ( $_ ) } 240 .. 270,
},
pow2_08 =>
{ map { $_ => $_ } 1 .. 8 },
pow2_16 =>
{ map { $_ => $_ } 1 .. 16 },
pow2_32 =>
{ map { $_ => $_ } 1 .. 32 },
pow2_64 =>
{ map { $_ => $_ } 1 .. 64 },
chr_chr_utf8 =>
{ map { chr ( $_ ) => chr ( $_ ) } 256 .. 270 },
chr_num_utf8 =>
{ map { chr ( $_ ) => $_ } 256 .. 270 },
num_chr_utf8 =>
{ map { $_ => chr ( $_ ) } 256 .. 270 },
mix_mix_utf8 =>
{ map { ( $_ % 2 ? chr ( $_ ) : $_ ) => ( $_ % 2 ? $_ : chr ( $_ )) } 256 .. 270 },
chr_mix_utf8 =>
{ map { chr ( $_ ) => ( $_ % 2 ? $_ : chr ( $_ )) } 256 .. 270 },
num_mix_utf8 =>
{ map { $_ => ( $_ % 2 ? $_ : chr ( $_ )) } 256 .. 270 },
mix_num_utf8 =>
{ map { ( $_ % 2 ? chr ( $_ ) : $_ ) => $_ } 256 .. 270 },
mix_chr_utf8 =>
{ map { ( $_ % 2 ? chr ( $_ ) : $_ ) => chr ( $_ ) } 256 .. 270 },
);
my $rand_seed = join ( "" , map chr ( rand 256), 1..16);
foreach my $seed ( "1234567812345678" , undef , $rand_seed ) {
foreach my $idx (0 .. ( @source_hashes /2)-1) {
my $name = $source_hashes [ $idx *2];
my $source_hash = $source_hashes [ $idx *2+1];
foreach my $variant ( defined ( $ENV {VARIANT}) ? ( $ENV {VARIANT}) : (MIN_VARIANT .. MAX_VARIANT)) {
|
t/OnDisk.pl
view on Meta::CPAN
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | is_deeply(\ @got_keys ,\ @want_keys , "keys in both are the same ($title)" );
is_deeply(\ @got_each_keys ,\ @want_keys , "got_keys and got_each_keys agree ($title)" );
is_deeply(\ @got_values ,\ @want_values , "got_values and got_each_values agree ($title)" );
is_deeply(\ @got_fetch_values ,\ @want_values , "values in both are same ($title)" );
is_deeply(\ @got_each_values ,\ @want_values , "values in both are same ($title)" );
{
my @bad ;
foreach my $idx (0.. $#got_keys ) {
if (utf8::is_utf8( $got_keys [ $idx ]) != utf8::is_utf8( $want_keys [ $idx ])) {
push @bad , [ $got_keys [ $idx ], $want_keys [ $idx ] ];
}
}
is(0+ @bad ,0, "no keys with differing utf8 flags ($title)" )
or diag Dumper( $bad [0]);
}
} else {
ok(0, "test cannot pass if make_file dies" ) for 1..17;
}
}
}
}
}
|