Algorithm-MinPerfHashTwoLevel

 view release on metacpan or  search on metacpan

MinPerfHashTwoLevel.xs  view on Meta::CPAN

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include <sys/stat.h>
#include <assert.h>
#include "mph2l.h"
#include "mph_hv_macro.h"
#include "mph_siphash.h"
 
#define MAX_VARIANT 5
#define MIN_VARIANT 5
 
MPH_STATIC_INLINE void
sv_set_from_bucket(pTHX_ SV *sv, U8 *strs, const U32 ofs, const U32 len, const U32 idx, const U8 *flags, const U32 bits, const U8 utf8_default, const U8 utf8_default_shift) {
    U8 *ptr;
    U8 is_utf8;
    if (ofs) {
        ptr= (strs) + (ofs);
        if (utf8_default) {
            is_utf8= utf8_default >> utf8_default_shift;
        } else {
            GETBITS(is_utf8,flags,idx,bits);
        }
    } else {
        ptr= 0;
        is_utf8= 0;
    }
    /* note that sv_setpvn() will cause the sv to
     * become undef if ptr is 0 */
    sv_setpvn_mg((sv),ptr,len);
    if (is_utf8 > 1) {
        sv_utf8_upgrade(sv);
    }
    else
    if (is_utf8) {
        SvUTF8_on(sv);
    }
    else
    if (ptr) {
        SvUTF8_off(sv);
    }
}
 
MPH_STATIC_INLINE int
lookup_bucket(pTHX_ struct mph_header *mph, U32 index, SV *key_sv, SV *val_sv)

MinPerfHashTwoLevel.xs  view on Meta::CPAN

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
U8 *key_pv;
U64 h0;
U32 h1;
U32 h2;
U32 index;
U8 *got_key_pv;
STRLEN got_key_len;
 
if (SvUTF8(key_sv)) {
    SV *tmp= sv_2mortal(newSVsv(key_sv));
    sv_utf8_downgrade(tmp,1);
    key_sv= tmp;
}
key_pv= SvPV(key_sv,key_len);
h0= mph_hash_with_state(state,key_pv,key_len);
h1= h0 >> 32;
index= h1 % mph->num_buckets;
 
bucket= buckets + index;
if (!bucket->xor_val)
    return 0;

MinPerfHashTwoLevel.xs  view on Meta::CPAN

211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
    }
    return head->variant;
}
 
void
mph_munmap(struct mph_obj *obj) {
    munmap(obj->header,obj->bytes);
}
 
STRLEN
normalize_with_flags(pTHX_ SV *sv, SV *normalized_sv, SV *is_utf8_sv, int downgrade) {
    STRLEN len;
    if (SvROK(sv)) {
        croak("Error: Not expecting a reference value in source hash");
    }
    sv_setsv(normalized_sv,sv);
    if (SvOK(sv)) {
        STRLEN pv_len;
        char *pv= SvPV(sv,pv_len);
        if (pv_len > 0xFFFF)
            croak("Error: String in source hash is too long to store, max length is %u got length %lu", 0xFFFF, pv_len);
        if (SvUTF8(sv)) {
            if (downgrade)
                sv_utf8_downgrade(normalized_sv,1);
            if (SvUTF8(normalized_sv)) {
                SvUTF8_off(normalized_sv);
                sv_setiv(is_utf8_sv,1);
            } else {
                sv_setiv(is_utf8_sv,2);
            }
        }
        return pv_len;
    } else {
        sv_setiv(is_utf8_sv, 0);
        return 0;
    }
}
 
U32
_roundup(const U32 n, const U32 s) {
    const U32 r= n % s;
    if (r) {
        return n + s - r;
    } else {

MinPerfHashTwoLevel.xs  view on Meta::CPAN

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
normalize_source_hash(pTHX_ HV *source_hv, AV *keys_av, U32 compute_flags, SV *buf_length_sv, char *state_pv) {
    dMY_CXT;
    HE *he;
    U32 buf_length= 0;
    U32 ctr;
 
    hv_iterinit(source_hv);
    while (he= hv_iternext(source_hv)) {
        SV *val_sv= HeVAL(he);
        SV *val_normalized_sv;
        SV *val_is_utf8_sv;
 
        SV *key_sv;
        SV *key_normalized_sv;
        SV *key_is_utf8_sv;
        HV *hv;
        U8 *key_pv;
        STRLEN key_len;
        U64 h0;
 
        if (!val_sv) croak("panic: no sv for value?");
        if (!SvOK(val_sv) && (compute_flags & MPH_F_FILTER_UNDEF)) continue;
 
        hv= newHV();
        val_normalized_sv= newSV(0);
        val_is_utf8_sv= newSVuv(0);
 
        key_sv= newSVhek(HeKEY_hek(he));
        key_normalized_sv= newSV(0);
        key_is_utf8_sv= newSVuv(0);
 
        hv_ksplit(hv,15);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY,            key_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_NORMALIZED, key_normalized_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,    key_is_utf8_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL,            SvREFCNT_inc_simple_NN(val_sv));
        hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED, val_normalized_sv);
        hv_store_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,    val_is_utf8_sv);
        /* install everything into the keys_av just in case normalize_with_flags() dies */
        av_push(keys_av,newRV_noinc((SV*)hv));
 
        buf_length += normalize_with_flags(aTHX_ key_sv, key_normalized_sv, key_is_utf8_sv, 1);
        buf_length += normalize_with_flags(aTHX_ val_sv, val_normalized_sv, val_is_utf8_sv, 0);
 
        key_pv= (U8 *)SvPV(key_normalized_sv,key_len);
        h0= mph_hash_with_state(state_pv,key_pv,key_len);
 
        hv_store_ent_with_keysv(hv,MPH_KEYSV_H0,             newSVuv(h0));
    }
    if (buf_length_sv)
        sv_setuv(buf_length_sv, buf_length);
 
    /* we now know how many keys there are, and what the max_xor_val should be */

MinPerfHashTwoLevel.xs  view on Meta::CPAN

885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
char *state;
struct mph_bucket *table;
char *key_flags;
char *val_flags;
char *str_buf_start;
char *str_buf_end;
char *str_buf_pos;
U32 i;
STRLEN pv_len;
char *pv;
U32 key_is_utf8_count[3]={0,0,0};
U32 val_is_utf8_count[2]={0,0};
U32 used_flags;
U32 the_flag;
IV key_is_utf8_generic=-1;
IV val_is_utf8_generic=-1;
 
for (i= 0; i < bucket_count; i++) {
    SV **got= av_fetch(buckets_av,i,0);
    HV *hv= (HV *)SvRV(*got);
    HE *key_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
    HE *val_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
    key_is_utf8_count[SvUV(HeVAL(key_is_utf8_he))]++;
    val_is_utf8_count[SvUV(HeVAL(val_is_utf8_he))]++;
}
used_flags= 0;
if (key_is_utf8_count[0]) { the_flag= 0; used_flags++; }
if (key_is_utf8_count[1]) { the_flag= 1; used_flags++; }
if (key_is_utf8_count[2]) { the_flag= 2; used_flags++; }
if (used_flags == 1) {
    key_is_utf8_generic= the_flag;
    key_flags_rlen= 0;
}
used_flags= 0;
if (val_is_utf8_count[0]) { the_flag= 0; used_flags++; }
if (val_is_utf8_count[1]) { the_flag= 1; used_flags++; }
if (used_flags == 1) {
    val_is_utf8_generic= the_flag;
    val_flags_rlen= 0;
}
 
total_size=
    + header_rlen
    + state_rlen
    + table_rlen
    + key_flags_rlen
    + val_flags_rlen
    + str_rlen

MinPerfHashTwoLevel.xs  view on Meta::CPAN

941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
head->magic_num= 1278363728;
head->variant= variant;
head->num_buckets= bucket_count;
head->state_ofs= header_rlen;
head->table_ofs= head->state_ofs + state_rlen;
head->key_flags_ofs= head->table_ofs + table_rlen;
head->val_flags_ofs= head->key_flags_ofs + key_flags_rlen;
head->str_buf_ofs= head->val_flags_ofs + val_flags_rlen;
 
if (val_is_utf8_generic >= 0)
    head->general_flags |= (MPH_VALS_ARE_SAME_UTF8NESS_FLAG_BIT | (val_is_utf8_generic << MPH_VALS_ARE_SAME_UTF8NESS_SHIFT));
if (key_is_utf8_generic >= 0)
    head->general_flags |= (MPH_KEYS_ARE_SAME_UTF8NESS_FLAG_BIT | (key_is_utf8_generic << MPH_KEYS_ARE_SAME_UTF8NESS_SHIFT));
 
state= start + head->state_ofs;
table= (struct mph_bucket *)(start + head->table_ofs);
key_flags= start + head->key_flags_ofs;
val_flags= start + head->val_flags_ofs;
str_buf_start= start + head->str_buf_ofs;
str_buf_end= start + total_size;
str_buf_pos= str_buf_start + 2;
 
Copy(state_pv,state,state_len,char);

MinPerfHashTwoLevel.xs  view on Meta::CPAN

973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
    HE *val_normalized_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED,0);
    HE *xor_val_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_XOR_VAL,0);
 
    if (xor_val_he) {
        table[i].xor_val= SvUV(HeVAL(xor_val_he));
    } else {
        table[i].xor_val= 0;
    }
    SETOFS(i,key_normalized_he,table,key_ofs,key_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
    SETOFS(i,val_normalized_he,table,val_ofs,val_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
    if ( key_is_utf8_generic < 0) {
        HE *key_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
        if (key_is_utf8_he) {
            UV u= SvUV(HeVAL(key_is_utf8_he));
            SETBITS(u,key_flags,i,2);
        } else {
            croak("panic: out of memory? no key_is_utf8_he for %u",i);
        }
    }
    if ( val_is_utf8_generic < 0 ) {
        HE *val_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
        if (val_is_utf8_he) {
            UV u= SvUV(HeVAL(val_is_utf8_he));
            SETBITS(u,val_flags,i,1);
        } else {
            croak("panic: out of memory? no val_is_utf8_he for %u",i);
        }
    }
}
*str_buf_pos =   0; str_buf_pos++;
*str_buf_pos = 128; str_buf_pos++;
{
    U32 r= (str_buf_pos - start) % alignment;
    if (r) {
        str_buf_pos += (alignment - r);
    }

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN

227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
    x = ((x >> 16) ^ x) * 0x45d9f3b;
    x = ((x >> 16) ^ x) * 0x45d9f3b;
    x = ((x >> 16) ^ x);
 
which is just a simple 32 bit integer hash function I found at
https://stackoverflow.com/a/12996028, but any decent reversible
integer hash function would do.
 
*NOTE* in Perl a given string may have differing binary representations
if it is encoded as utf8 or not. This module uses the same conventions
as Perl itself, which is that keys are stored in their minimal form when
possible, and are only stored in their unicode (utf8) form when they
cannot be downgraded to latin-1. This ensures that the unicode and latin-1
representations of a given string are treated as the same key. This module
deals with this by "normalizing" the keys and values into latin-1, but
tracking the representation as a flag. See key_normalized and key_is_utf8
(and their 'val' equivalents) documented in the construct method.
 
=head2 METHODS
 
=over 4
 
=item new
 
Construct a new Algorithm::MinPerfHashTwoLevel object. Optional arguments
which may be provided are 'source_hash' which is a hash reference to use

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
on the 'source_hash' passed into the constructor, or requires one to passed
in as an argument.
 
Returns an array of hashes containing information about each bucket:
 
          {
            "h1_keys" => 2,
            "h0" => "17713559403787135240",
            "idx" => 0,
            "key" => "\x{103}",
            "key_is_utf8" => 1,
            "key_normalized" => "\304\203",
            "val" => "\x{103}",
            "val_is_utf8" => 1,
            "val_normalized" => "\304\203",
            "xor_val" => 2
          },
 
The meaning of these keys is as follows:
 
=over 4
 
=item h1_keys

lib/Algorithm/MinPerfHashTwoLevel.pm  view on Meta::CPAN

291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
The hash value computed for this key.
 
=item idx
 
The index of this bucket.
 
=item key
 
The key for this bucket as a perl string. (See key_normalized.)
 
=item key_is_utf8
 
Whether this key is encoded as utf8. Will be one of
0 for "not utf8", 1 for "is utf8", and 2 for "was utf8"
meaning the key is stored as latin-1, but will be upgraded
when fetched.
 
=item key_normalized
 
The raw bytes of the normalized key. (See key_is_utf8.)
 
=item val
 
The value for this bucket as a perl string. (See val_normalized.)
 
=item val_is_utf8
 
Whether this key is encoded as utf8. Will be either
0 for "not utf8" or 1 for "is utf8".
 
=item val_normalized
 
The raw bytes of the normalized key. (See val_is_utf8).
 
=item xor_val
 
The mask to be xor'ed with the second hash (h2) to determine
the actual lookup bucket. If the xor_val for a given bucket
is 0 then the key is not in the hash.
 
=back
 
=back

lib/Tie/Hash/MinPerfHashTwoLevel/OnDisk.pm  view on Meta::CPAN

445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
=head2 FILE FORMAT
 
Currently there is only one support file format variant, 5.
 
The file structure consists of a header, followed by a byte vector of seed/state
data for the hash function, followed by a bucket table with records of a fixed size,
optionally followed by a bitvector of the flags for the keys with two bits per key,
optionally followed by a bitvector of flags for values with one bit per value,
followed by a string table containing the comment for the file and the strings it
contains, and lastly a checksum; the last 8 bytes of the file contain a hash of the
rest of the file. The key flags may be 0 for "latin-1/not-utf8", 1 for "is-utf8",
and 2 for "was-utf8" which is used for keys which can be represented as latin-1,
but should be restored as unicode/utf8. The val flags are similar but do not (need to)
support "was-utf8".
 
Structure:
 
    Header
    Hash-state
    Bucket-table
    Key flags (optional)
    Val flags (optional)
    Strings
    Checksum

mph2l.h  view on Meta::CPAN

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
} STMT_END
 
#define MPH_INIT_ALL_KEYSV() STMT_START {\
    MY_CXT_INIT;                                                \
    MPH_INIT_KEYSV(MPH_KEYSV_IDX,"idx");                        \
    MPH_INIT_KEYSV(MPH_KEYSV_H1_KEYS,"h1_keys");                \
    MPH_INIT_KEYSV(MPH_KEYSV_XOR_VAL,"xor_val");                \
    MPH_INIT_KEYSV(MPH_KEYSV_H0,"h0");                          \
    MPH_INIT_KEYSV(MPH_KEYSV_KEY,"key");                        \
    MPH_INIT_KEYSV(MPH_KEYSV_KEY_NORMALIZED,"key_normalized");  \
    MPH_INIT_KEYSV(MPH_KEYSV_KEY_IS_UTF8,"key_is_utf8");        \
    MPH_INIT_KEYSV(MPH_KEYSV_VAL,"val");                        \
    MPH_INIT_KEYSV(MPH_KEYSV_VAL_NORMALIZED,"val_normalized");  \
    MPH_INIT_KEYSV(MPH_KEYSV_VAL_IS_UTF8,"val_is_utf8");        \
                                                                \
    MPH_INIT_KEYSV(MPH_KEYSV_VARIANT,"variant");                \
    MPH_INIT_KEYSV(MPH_KEYSV_COMPUTE_FLAGS,"compute_flags");    \
    MPH_INIT_KEYSV(MPH_KEYSV_STATE,"state");                    \
    MPH_INIT_KEYSV(MPH_KEYSV_SOURCE_HASH,"source_hash");        \
    MPH_INIT_KEYSV(MPH_KEYSV_BUF_LENGTH,"buf_length");          \
    MPH_INIT_KEYSV(MPH_KEYSV_BUCKETS,"buckets");                \
    MPH_INIT_KEYSV(MPH_KEYSV_MOUNT,"mount");                    \
} STMT_END

ppport.h  view on Meta::CPAN

249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
    pv_escape()               NEED_pv_escape               NEED_pv_escape_GLOBAL
    pv_pretty()               NEED_pv_pretty               NEED_pv_pretty_GLOBAL
    sv_2pv_flags()            NEED_sv_2pv_flags            NEED_sv_2pv_flags_GLOBAL
    sv_2pvbyte()              NEED_sv_2pvbyte              NEED_sv_2pvbyte_GLOBAL
    sv_catpvf_mg()            NEED_sv_catpvf_mg            NEED_sv_catpvf_mg_GLOBAL
    sv_catpvf_mg_nocontext()  NEED_sv_catpvf_mg_nocontext  NEED_sv_catpvf_mg_nocontext_GLOBAL
    sv_pvn_force_flags()      NEED_sv_pvn_force_flags      NEED_sv_pvn_force_flags_GLOBAL
    sv_setpvf_mg()            NEED_sv_setpvf_mg            NEED_sv_setpvf_mg_GLOBAL
    sv_setpvf_mg_nocontext()  NEED_sv_setpvf_mg_nocontext  NEED_sv_setpvf_mg_nocontext_GLOBAL
    sv_unmagicext()           NEED_sv_unmagicext           NEED_sv_unmagicext_GLOBAL
    utf8_to_uvchr_buf()       NEED_utf8_to_uvchr_buf       NEED_utf8_to_uvchr_buf_GLOBAL
    vload_module()            NEED_vload_module            NEED_vload_module_GLOBAL
    vmess()                   NEED_vmess                   NEED_vmess_GLOBAL
    vnewSVpvf()               NEED_vnewSVpvf               NEED_vnewSVpvf_GLOBAL
    warner()                  NEED_warner                  NEED_warner_GLOBAL
 
To avoid namespace conflicts, you can change the namespace of the
explicitly exported functions / variables using the C<DPPP_NAMESPACE>
macro. Just C<#define> the macro before including C<ppport.h>:
 
    #define DPPP_NAMESPACE MyOwnNamespace_

ppport.h  view on Meta::CPAN

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
PERL_MAGIC_shared_scalar|5.007003||p
PERL_MAGIC_shared|5.007003||p
PERL_MAGIC_sigelem|5.007002||p
PERL_MAGIC_sig|5.007002||p
PERL_MAGIC_substr|5.007002||p
PERL_MAGIC_sv|5.007002||p
PERL_MAGIC_taint|5.007002||p
PERL_MAGIC_tiedelem|5.007002||p
PERL_MAGIC_tiedscalar|5.007002||p
PERL_MAGIC_tied|5.007002||p
PERL_MAGIC_utf8|5.008001||p
PERL_MAGIC_uvar_elem|5.007003||p
PERL_MAGIC_uvar|5.007002||p
PERL_MAGIC_vec|5.007002||p
PERL_MAGIC_vstring|5.008001||p
PERL_PV_ESCAPE_ALL|5.009004||p
PERL_PV_ESCAPE_FIRSTCHAR|5.009004||p
PERL_PV_ESCAPE_NOBACKSLASH|5.009004||p
PERL_PV_ESCAPE_NOCLEAR|5.009004||p
PERL_PV_ESCAPE_QUOTE|5.009004||p
PERL_PV_ESCAPE_RE|5.009005||p

ppport.h  view on Meta::CPAN

971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
SvPV_nomg_const|5.009003||p
SvPV_nomg_nolen|5.013007||p
SvPV_nomg|5.007002||p
SvPV_renew|5.009003||p
SvPV_set|||
SvPVbyte_force||5.009002|
SvPVbyte_nolen||5.006000|
SvPVbytex_force||5.006000|
SvPVbytex||5.006000|
SvPVbyte|5.006000||p
SvPVutf8_force||5.006000|
SvPVutf8_nolen||5.006000|
SvPVutf8x_force||5.006000|
SvPVutf8x||5.006000|
SvPVutf8||5.006000|
SvPVx|||
SvPV|||
SvREADONLY_off|||
SvREADONLY_on|||
SvREADONLY|||
SvREFCNT_dec_NN||5.017007|
SvREFCNT_dec|||
SvREFCNT_inc_NN|5.009004||p
SvREFCNT_inc_simple_NN|5.009004||p
SvREFCNT_inc_simple_void_NN|5.009004||p

ppport.h  view on Meta::CPAN

1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
Zero|||
__ASSERT_|||p
_aMY_CXT|5.007003||p
_inverse_folds|||
_is_grapheme|||
_is_in_locale_category|||
_new_invlist_C_array|||
_pMY_CXT|5.007003||p
_to_fold_latin1|||n
_to_upper_title_latin1|||
_to_utf8_case|||
_variant_byte_number|||n
_warn_problematic_locale|||n
aMY_CXT_|5.007003||p
aMY_CXT|5.007003||p
aTHXR_|5.024000||p
aTHXR|5.024000||p
aTHX_|5.006000||p
aTHX|5.006000||p
abort_execution|||
add_above_Latin1_folds|||

ppport.h  view on Meta::CPAN

1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
bad_type_pv|||
bind_match|||
block_end||5.004000|
block_gimme||5.004000|
block_start||5.004000|
blockhook_register||5.013003|
boolSV|5.004000||p
boot_core_PerlIO|||
boot_core_UNIVERSAL|||
boot_core_mro|||
bytes_cmp_utf8||5.013007|
cBOOL|5.013000||p
call_argv|5.006000||p
call_atexit||5.006000|
call_list||5.004000|
call_method|5.006000||p
call_pv|5.006000||p
call_sv|5.006000||p
caller_cx|5.013005|5.006000|p
calloc||5.007002|n
cando|||

ppport.h  view on Meta::CPAN

1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
do_seek|||
do_semop|||
do_shmio|||
do_smartmatch|||
do_spawn_nowait|||
do_spawn|||
do_sprintf|||
do_sv_dump||5.006000|
do_sysseek|||
do_tell|||
do_trans_complex_utf8|||
do_trans_complex|||
do_trans_count_utf8|||
do_trans_count|||
do_trans_simple_utf8|||
do_trans_simple|||
do_trans|||
do_vecget|||
do_vecset|||
do_vop|||
docatch|||
does_utf8_overflow|||n
doeval_compile|||
dofile|||
dofindlabel|||
doform|||
doing_taint||5.008001|n
dooneliner|||
doopen_pm|||
doparseform|||
dopoptoeval|||
dopoptogivenfor|||

ppport.h  view on Meta::CPAN

1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
find_runcv||5.008001|
find_rundefsv||5.013002|
find_script|||
find_span_end_mask|||n
find_span_end|||n
first_symbol|||n
fixup_errno_string|||
foldEQ_latin1_s2_folded|||n
foldEQ_latin1||5.013008|n
foldEQ_locale||5.013002|n
foldEQ_utf8||5.013002|
foldEQ||5.013002|n
fold_constants|||
forbid_setid|||
force_ident_maybe_lex|||
force_ident|||
force_list|||
force_next|||
force_strict_version|||
force_version|||
force_word|||

ppport.h  view on Meta::CPAN

1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
hv_rand_set||5.018000|
hv_riter_p||5.009003|
hv_riter_set||5.009003|
hv_scalar||5.009001|
hv_store_ent||5.003070|
hv_stores|5.009004||p
hv_store|||
hv_undef_flags|||
hv_undef|||
ibcmp_locale||5.004000|
ibcmp_utf8||5.007003|
ibcmp|||
incline|||
incpush_if_exists|||
incpush_use_sep|||
incpush|||
ingroup|||
init_argv_symbols|||
init_constants|||
init_dbargs|||
init_debugger|||

ppport.h  view on Meta::CPAN

1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
isASCII_A|||p
isASCII|5.006000||p
isBLANK_A|||p
isBLANK|5.006001||p
isC9_STRICT_UTF8_CHAR|||n
isCNTRL_A|||p
isCNTRL|5.006000||p
isDIGIT_A|||p
isDIGIT|||p
isFF_OVERLONG|||n
isFOO_utf8_lc|||
isGCB|||
isGRAPH_A|||p
isGRAPH|5.006000||p
isIDCONT_A|||p
isIDCONT|5.017008|5.017008|p
isIDFIRST_A|||p
isIDFIRST|||p
isLB|||
isLOWER_A|||p
isLOWER|||p

ppport.h  view on Meta::CPAN

1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
isUPPER|||p
isUTF8_CHAR_flags|||
isUTF8_CHAR||5.021001|n
isWB|||
isWORDCHAR_A|||p
isWORDCHAR|5.013006|5.013006|p
isXDIGIT_A|||p
isXDIGIT|5.006000||p
is_an_int|||
is_ascii_string||5.011000|n
is_c9strict_utf8_string_loclen|||n
is_c9strict_utf8_string_loc|||n
is_c9strict_utf8_string|||n
is_handle_constructor|||n
is_invariant_string||5.021007|n
is_lvalue_sub||5.007001|
is_safe_syscall||5.019004|
is_ssc_worth_it|||n
is_strict_utf8_string_loclen|||n
is_strict_utf8_string_loc|||n
is_strict_utf8_string|||n
is_utf8_char_buf||5.015008|n
is_utf8_common_with_len|||
is_utf8_common|||
is_utf8_cp_above_31_bits|||n
is_utf8_fixed_width_buf_flags|||n
is_utf8_fixed_width_buf_loc_flags|||n
is_utf8_fixed_width_buf_loclen_flags|||n
is_utf8_invariant_string_loc|||n
is_utf8_invariant_string|||n
is_utf8_non_invariant_string|||n
is_utf8_overlong_given_start_byte_ok|||n
is_utf8_string_flags|||n
is_utf8_string_loc_flags|||n
is_utf8_string_loclen_flags|||n
is_utf8_string_loclen||5.009003|n
is_utf8_string_loc||5.008001|n
is_utf8_string||5.006001|n
is_utf8_valid_partial_char_flags|||n
is_utf8_valid_partial_char|||n
isa_lookup|||
isinfnansv|||
isinfnan||5.021004|n
items|||n
ix|||n
jmaybe|||
join_exact|||
keyword_plugin_standard|||
keyword|||
leave_scope|||

ppport.h  view on Meta::CPAN

1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
magic_setlvref|||
magic_setmglob|||
magic_setnkeys|||
magic_setnonelem|||
magic_setpack|||
magic_setpos|||
magic_setregexp|||
magic_setsig|||
magic_setsubstr|||
magic_settaint|||
magic_setutf8|||
magic_setuvar|||
magic_setvec|||
magic_set|||
magic_sizepack|||
magic_wipepack|||
make_matcher|||
make_trie|||
malloc_good_size|||n
malloced_size|||n
malloc||5.007002|n

ppport.h  view on Meta::CPAN

1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
mro_register||5.010001|
mro_set_mro||5.010001|
mro_set_private_data||5.010001|
mul128|||
multiconcat_stringify|||
multideref_stringify|||
my_atof2||5.007002|
my_atof3|||
my_atof||5.006000|
my_attrs|||
my_bytes_to_utf8|||n
my_chsize|||
my_clearenv|||
my_cxt_index|||
my_cxt_init|||
my_dirfd||5.009005|n
my_exit_jump|||
my_exit|||
my_failure_exit||5.004000|
my_fflush_all||5.006000|
my_fork||5.007003|n

ppport.h  view on Meta::CPAN

1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
my_stat_flags|||
my_stat||5.024000|
my_strerror|||
my_strftime||5.007002|
my_strlcat|5.009004||pn
my_strlcpy|5.009004||pn
my_strnlen|||pn
my_strtod|||n
my_unexec|||
my_vsnprintf||5.009004|n
need_utf8|||n
newANONATTRSUB||5.006000|
newANONHASH|||
newANONLIST|||
newANONSUB|||
newASSIGNOP|||
newATTRSUB_x|||
newATTRSUB||5.006000|
newAVREF|||
newAV|||
newBINOP|||

ppport.h  view on Meta::CPAN

2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
newSVavdefelem|||
newSVhek||5.009003|
newSViv|||
newSVnv|||
newSVpadname||5.017004|
newSVpv_share||5.013006|
newSVpvf_nocontext|||vn
newSVpvf||5.004000|v
newSVpvn_flags|5.010001||p
newSVpvn_share|5.007001||p
newSVpvn_utf8|5.010001||p
newSVpvn|5.004050||p
newSVpvs_flags|5.010001||p
newSVpvs_share|5.009003||p
newSVpvs|5.009003||p
newSVpv|||
newSVrv|||
newSVsv_flags|||
newSVsv_nomg|||
newSVsv|||
newSVuv|5.006000||p

ppport.h  view on Meta::CPAN

2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
nextargv|||
nextchar|||
ninstr|||n
no_bareword_allowed|||
no_fh_allowed|||
no_op|||
noperl_die|||vn
not_a_number|||
not_incrementable|||
nothreadhook||5.008000|
notify_parser_that_changed_to_utf8|||
nuke_stacks|||
num_overflow|||n
oopsAV|||
oopsHV|||
op_append_elem||5.013006|
op_append_list||5.013006|
op_class|||
op_clear|||
op_contextualize||5.013006|
op_convert_list||5.021006|

ppport.h  view on Meta::CPAN

2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
sv_2iuv_common|||
sv_2iuv_non_preserve|||
sv_2iv_flags||5.009001|
sv_2iv|||
sv_2mortal|||
sv_2nv_flags||5.013001|
sv_2pv_flags|5.007002||p
sv_2pv_nolen|5.006000||p
sv_2pvbyte_nolen|5.006000||p
sv_2pvbyte|5.006000||p
sv_2pvutf8_nolen||5.006000|
sv_2pvutf8||5.006000|
sv_2pv|||
sv_2uv_flags||5.009001|
sv_2uv|5.004000||p
sv_add_arena|||
sv_add_backref|||
sv_backoff|||n
sv_bless|||
sv_buf_to_ro|||
sv_buf_to_rw|||
sv_cat_decode||5.008001|

ppport.h  view on Meta::CPAN

2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
sv_gets||5.003070|
sv_grow|||
sv_i_ncmp|||
sv_inc_nomg||5.013002|
sv_inc|||
sv_insert_flags||5.010001|
sv_insert|||
sv_isa|||
sv_isobject|||
sv_iv||5.005000|
sv_len_utf8_nomg|||
sv_len_utf8||5.006000|
sv_len|||
sv_magic_portable|5.024000|5.004000|p
sv_magicext_mglob|||
sv_magicext||5.007003|
sv_magic|||
sv_mortalcopy_flags|||
sv_mortalcopy|||
sv_ncmp|||
sv_newmortal|||
sv_newref|||

ppport.h  view on Meta::CPAN

2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
sv_pos_u2b_forwards|||n
sv_pos_u2b_midway|||n
sv_pos_u2b||5.006000|
sv_pvbyten_force||5.006000|
sv_pvbyten||5.006000|
sv_pvbyte||5.006000|
sv_pvn_force_flags|5.007002||p
sv_pvn_force|||
sv_pvn_nomg|5.007003|5.005000|p
sv_pvn||5.005000|
sv_pvutf8n_force||5.006000|
sv_pvutf8n||5.006000|
sv_pvutf8||5.006000|
sv_pv||5.006000|
sv_recode_to_utf8||5.007003|
sv_reftype|||
sv_ref||5.015004|
sv_replace|||
sv_report_used|||
sv_resetpvn|||
sv_reset|||
sv_rvunweaken|||
sv_rvweaken||5.006000|
sv_set_undef|||
sv_sethek|||

ppport.h  view on Meta::CPAN

2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
sv_uni_display||5.007003|
sv_unmagicext|5.013008||p
sv_unmagic|||
sv_unref_flags||5.007001|
sv_unref|||
sv_untaint||5.004000|
sv_upgrade|||
sv_usepvn_flags||5.009004|
sv_usepvn_mg|5.004050||p
sv_usepvn|||
sv_utf8_decode|||
sv_utf8_downgrade|||
sv_utf8_encode||5.006000|
sv_utf8_upgrade_flags_grow||5.011000|
sv_utf8_upgrade_flags||5.007002|
sv_utf8_upgrade_nomg||5.007002|
sv_utf8_upgrade||5.007001|
sv_uv|5.005000||p
sv_vcatpvf_mg|5.006000|5.004000|p
sv_vcatpvfn_flags||5.017002|
sv_vcatpvfn||5.004000|
sv_vcatpvf|5.006000|5.004000|p
sv_vsetpvf_mg|5.006000|5.004000|p
sv_vsetpvfn||5.004000|
sv_vsetpvf|5.006000|5.004000|p
svtype|||
swallow_bom|||

ppport.h  view on Meta::CPAN

2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
sys_init3||5.010000|n
sys_init||5.010000|n
sys_intern_clear|||
sys_intern_dup|||
sys_intern_init|||
sys_term||5.010000|n
taint_env|||
taint_proper|||
tied_method|||v
tmps_grow_p|||
toFOLD_utf8_safe|||
toFOLD_utf8||5.019001|
toFOLD_uvchr||5.023009|
toFOLD||5.019001|
toLOWER_L1||5.019001|
toLOWER_LC||5.004000|
toLOWER_utf8_safe|||
toLOWER_utf8||5.015007|
toLOWER_uvchr||5.023009|
toLOWER|||
toTITLE_utf8_safe|||
toTITLE_utf8||5.015007|
toTITLE_uvchr||5.023009|
toTITLE||5.019001|
toUPPER_utf8_safe|||
toUPPER_utf8||5.015007|
toUPPER_uvchr||5.023009|
toUPPER|||
to_byte_substr|||
to_lower_latin1|||n
to_utf8_substr|||
tokenize_use|||
tokeq|||
tokereport|||
too_few_arguments_pv|||
too_many_arguments_pv|||
translate_substr_offsets|||n
traverse_op_tree|||
try_amagic_bin|||
try_amagic_un|||
turkic_fc|||

ppport.h  view on Meta::CPAN

2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
unpackstring||5.008001|
unreferenced_to_tmp_stack|||
unshare_hek_or_pvn|||
unshare_hek|||
unsharepvn||5.003070|
unwind_handler_stack|||
update_debugger_info|||
upg_version||5.009005|
usage|||
utf16_textfilter|||
utf16_to_utf8_reversed||5.006001|
utf16_to_utf8||5.006001|
utf8_distance||5.006000|
utf8_hop_back|||n
utf8_hop_forward|||n
utf8_hop_safe|||n
utf8_hop||5.006000|n
utf8_length||5.007001|
utf8_mg_len_cache_update|||
utf8_mg_pos_cache_update|||
utf8_to_uvchr_buf|5.015009|5.015009|p
utf8_to_uvchr|||p
utf8n_to_uvchr_error|||n
utf8n_to_uvchr||5.007001|n
utf8n_to_uvuni||5.007001|
utilize|||
uvchr_to_utf8_flags||5.007003|
uvchr_to_utf8||5.007001|
uvoffuni_to_utf8_flags||5.019004|
uvuni_to_utf8_flags||5.007003|
uvuni_to_utf8||5.007001|
valid_utf8_to_uvchr|||n
validate_suid|||
variant_under_utf8_count|||n
varname|||
vcmp||5.009000|
vcroak||5.006000|
vdeb||5.007003|
vform||5.006000|
visit|||
vivify_defelem|||
vivify_ref|||
vload_module|5.006000||p
vmess|5.006000|5.006000|p

ppport.h  view on Meta::CPAN

5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
    return p - str;
}
 
#endif
#endif
 
#if (PERL_BCDVERSION < 0x5031002)
        /* Versions prior to this accepted things that are now considered
         * malformations, and didn't return -1 on error with warnings enabled
         * */
#  undef utf8_to_uvchr_buf
#endif
 
/* This implementation brings modern, generally more restricted standards to
 * utf8_to_uvchr_buf.  Some of these are security related, and clearly must
 * be done.  But its arguable that the others need not, and hence should not.
 * The reason they're here is that a module that intends to play with the
 * latest perls shoud be able to work the same in all releases.  An example is
 * that perl no longer accepts any UV for a code point, but limits them to
 * IV_MAX or below.  This is for future internal use of the larger code points.
 * If it turns out that some of these changes are breaking code that isn't
 * intended to work with modern perls, the tighter restrictions could be
 * relaxed.  khw thinks this is unlikely, but has been wrong in the past. */
 
#ifndef utf8_to_uvchr_buf
   /* Choose which underlying implementation to use.  At least one must be
    * present or the perl is too early to handle this function */
#  if defined(utf8n_to_uvchr) || defined(utf8_to_uv)
#    if defined(utf8n_to_uvchr)   /* This is the preferred implementation */
#      define _ppport_utf8_to_uvchr_buf_callee utf8n_to_uvchr
#    else
#      define _ppport_utf8_to_uvchr_buf_callee utf8_to_uv
#    endif
 
#  endif
 
#ifdef _ppport_utf8_to_uvchr_buf_callee
#  if defined(NEED_utf8_to_uvchr_buf)
static UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send, STRLEN * retlen);
static
#else
extern UV DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 * s, const U8 * send, STRLEN * retlen);
#endif
 
#if defined(NEED_utf8_to_uvchr_buf) || defined(NEED_utf8_to_uvchr_buf_GLOBAL)
 
#ifdef utf8_to_uvchr_buf
#  undef utf8_to_uvchr_buf
#endif
#define utf8_to_uvchr_buf(a,b,c) DPPP_(my_utf8_to_uvchr_buf)(aTHX_ a,b,c)
#define Perl_utf8_to_uvchr_buf DPPP_(my_utf8_to_uvchr_buf)
 
 
UV
DPPP_(my_utf8_to_uvchr_buf)(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
{
    UV ret;
    STRLEN curlen;
    bool overflows = 0;
    const U8 *cur_s = s;
    const bool do_warnings = ckWARN_d(WARN_UTF8);
 
    if (send > s) {
        curlen = send - s;
    }

ppport.h  view on Meta::CPAN

5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
        assert(0);  /* Modern perls die under this circumstance */
        curlen = 0;
        if (! do_warnings) {    /* Handle empty here if no warnings needed */
            if (retlen) *retlen = 0;
            return UNICODE_REPLACEMENT;
        }
    }
 
    /* The modern version allows anything that evaluates to a legal UV, but not
     * overlongs nor an empty input */
    ret = _ppport_utf8_to_uvchr_buf_callee(
                s, curlen, retlen,   (UTF8_ALLOW_ANYUV
                                  & ~(UTF8_ALLOW_LONG|UTF8_ALLOW_EMPTY)));
 
    /* But actually, modern versions restrict the UV to being no more than what
     * an IV can hold */
    if (ret > PERL_INT_MAX) {
        overflows = 1;
    }
 
#    if (PERL_BCDVERSION < 0x5026000)

ppport.h  view on Meta::CPAN

5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
                    ret, *cur_s, *s);
            }
            if (retlen) {
                *retlen = (STRLEN) -1;
            }
            return 0;
        }
    }
 
    /* If failed and warnings are off, to emulate the behavior of the real
     * utf8_to_uvchr(), try again, allowing anything.  (Note a return of 0 is
     * ok if the input was '\0') */
    if (UNLIKELY(ret == 0 && (curlen == 0 || *s != '\0'))) {
 
        /* If curlen is 0, we already handled the case where warnings are
         * disabled, so this 'if' will be true, and we won't look at the
         * contents of 's' */
        if (do_warnings) {
            *retlen = (STRLEN) -1;
        }
        else {
            ret = _ppport_utf8_to_uvchr_buf_callee(
                                            s, curlen, retlen, UTF8_ALLOW_ANY);
            /* Override with the REPLACEMENT character, as that is what the
             * modern version of this function returns */
            ret = UNICODE_REPLACEMENT;
 
#           if (PERL_BCDVERSION < 0x5016000)
 
            /* Versions earlier than this don't necessarily return the proper
             * length.  It should not extend past the end of string, nor past
             * what the first byte indicates the length is, nor past the

ppport.h  view on Meta::CPAN

5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
        }
    }
 
    return ret;
}
 
#  endif
#endif
#endif
 
#if defined(UTF8SKIP) && defined(utf8_to_uvchr_buf)
#undef utf8_to_uvchr /* Always redefine this unsafe function so that it refuses
                        to read past a NUL, making it much less likely to read
                        off the end of the buffer.  A NUL indicates the start
                        of the next character anyway.  If the input isn't
                        NUL-terminated, the function remains unsafe, as it
                        always has been. */
#ifndef utf8_to_uvchr
#  define utf8_to_uvchr(s, lp)           \
    ((*(s) == '\0')                                                             \
    ? utf8_to_uvchr_buf(s,((s)+1), lp) /* Handle single NUL specially */        \
    : utf8_to_uvchr_buf(s, (s) + my_strnlen((char *) (s), UTF8SKIP(s)), (lp)))
#endif
 
#endif
 
#ifdef HAS_MEMCMP
#ifndef memNE
#  define memNE(s1,s2,l)                 (memcmp(s1,s2,l))
#endif
 
#ifndef memEQ

ppport.h  view on Meta::CPAN

5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
#endif
 
#ifndef PERL_MAGIC_vstring
#  define PERL_MAGIC_vstring             'V'
#endif
 
#ifndef PERL_MAGIC_vec
#  define PERL_MAGIC_vec                 'v'
#endif
 
#ifndef PERL_MAGIC_utf8
#  define PERL_MAGIC_utf8                'w'
#endif
 
#ifndef PERL_MAGIC_substr
#  define PERL_MAGIC_substr              'x'
#endif
 
#ifndef PERL_MAGIC_defelem
#  define PERL_MAGIC_defelem             'y'
#endif

ppport.h  view on Meta::CPAN

6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
#if (PERL_BCDVERSION < 0x5006000)
# define D_PPP_CONSTPV_ARG(x)  ((char *) (x))
#else
# define D_PPP_CONSTPV_ARG(x)  (x)
#endif
#ifndef newSVpvn
#  define newSVpvn(data,len)             ((data)                                              \
                                    ? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \
                                    : newSV(0))
#endif
#ifndef newSVpvn_utf8
#  define newSVpvn_utf8(s, len, u)       newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
#endif
#ifndef SVf_UTF8
#  define SVf_UTF8                       0
#endif
 
#ifndef newSVpvn_flags
 
#if defined(NEED_newSVpvn_flags)
static SV * DPPP_(my_newSVpvn_flags)(pTHX_ const char *s, STRLEN len, U32 flags);
static

ppport.h  view on Meta::CPAN

6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
#ifdef sv_2pvbyte
#  undef sv_2pvbyte
#endif
#define sv_2pvbyte(a,b) DPPP_(my_sv_2pvbyte)(aTHX_ a,b)
#define Perl_sv_2pvbyte DPPP_(my_sv_2pvbyte)
 
 
char *
DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv, STRLEN *lp)
{
  sv_utf8_downgrade(sv,0);
  return SvPV(sv,*lp);
}
 
#endif
 
/* Hint: sv_2pvbyte
 * Use the SvPVbyte() macro instead of sv_2pvbyte().
 */
 
#undef SvPVbyte

ppport.h  view on Meta::CPAN

7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
    const MGVTBL* const virt = mg->mg_virtual;
    if (mg->mg_type == type && virt == vtbl) {
        *mgp = mg->mg_moremagic;
        if (virt && virt->svt_free)
            virt->svt_free(aTHX_ sv, mg);
        if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) {
            if (mg->mg_len > 0)
                Safefree(mg->mg_ptr);
            else if (mg->mg_len == HEf_SVKEY) /* Questionable on older perls... */
                SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr));
            else if (mg->mg_type == PERL_MAGIC_utf8)
                Safefree(mg->mg_ptr);
        }
        if (mg->mg_flags & MGf_REFCOUNTED)
            SvREFCNT_dec(mg->mg_obj);
        Safefree(mg);
    }
    else
        mgp = &mg->mg_moremagic;
}
if (SvMAGIC(sv)) {

ppport.h  view on Meta::CPAN

8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
DPPP_(my_pv_escape)(pTHX_ SV *dsv, char const * const str,
  const STRLEN count, const STRLEN max,
  STRLEN * const escaped, const U32 flags)
{
    const char esc = flags & PERL_PV_ESCAPE_RE ? '%' : '\\';
    const char dq = flags & PERL_PV_ESCAPE_QUOTE ? '"' : esc;
    char octbuf[32] = "%123456789ABCDF";
    STRLEN wrote = 0;
    STRLEN chsize = 0;
    STRLEN readsize = 1;
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
    bool isuni = flags & PERL_PV_ESCAPE_UNI ? 1 : 0;
#endif
    const char *pv  = str;
    const char * const end = pv + count;
    octbuf[0] = esc;
 
    if (!(flags & PERL_PV_ESCAPE_NOCLEAR))
        sv_setpvs(dsv, "");
 
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
    if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8*)pv, count))
        isuni = 1;
#endif
 
    for (; pv < end && (!max || wrote < max) ; pv += readsize) {
        const UV u =
#if defined(is_utf8_string) && defined(utf8_to_uvchr_buf)
                     isuni ? utf8_to_uvchr_buf((U8*)pv, end, &readsize) :
#endif
                             (U8)*pv;
        const U8 c = (U8)u & 0xFF;
 
        if (u > 255 || (flags & PERL_PV_ESCAPE_ALL)) {
            if (flags & PERL_PV_ESCAPE_FIRSTCHAR)
                chsize = my_snprintf(octbuf, sizeof octbuf,
                                      "%" UVxf, u);
            else
                chsize = my_snprintf(octbuf, sizeof octbuf,

t/Algo_v5.t  view on Meta::CPAN

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
}
 
my %hash= ("A" .. "Z");
my $buckets= $o->compute(\%hash);
is_deeply($buckets,
    [
           {
             "h0" => "2811388775115704789",
             "idx" => 0,
             "key" => "M",
             "key_is_utf8" => 0,
             "key_normalized" => "M",
             "val" => "N",
             "val_is_utf8" => 0,
             "val_normalized" => "N"
           },
           {
             "h0" => "8860848721851830215",
             "h1_keys" => 2,
             "idx" => 1,
             "key" => "K",
             "key_is_utf8" => 0,
             "key_normalized" => "K",
             "val" => "L",
             "val_is_utf8" => 0,
             "val_normalized" => "L",
             "xor_val" => 1
           },
           {
             "h0" => "2333874141850904510",
             "h1_keys" => 1,
             "idx" => 2,
             "key" => "G",
             "key_is_utf8" => 0,
             "key_normalized" => "G",
             "val" => "H",
             "val_is_utf8" => 0,
             "val_normalized" => "H",
             "xor_val" => "4294967295"
           },
           {
             "h0" => "16876117158654460122",
             "idx" => 3,
             "key" => "Y",
             "key_is_utf8" => 0,
             "key_normalized" => "Y",
             "val" => "Z",
             "val_is_utf8" => 0,
             "val_normalized" => "Z"
           },
           {
             "h0" => "8715353882719101949",
             "h1_keys" => 1,
             "idx" => 4,
             "key" => "S",
             "key_is_utf8" => 0,
             "key_normalized" => "S",
             "val" => "T",
             "val_is_utf8" => 0,
             "val_normalized" => "T",
             "xor_val" => "4294967294"
           },
           {
             "h0" => "7118026915973626049",
             "h1_keys" => 1,
             "idx" => 5,
             "key" => "U",
             "key_is_utf8" => 0,
             "key_normalized" => "U",
             "val" => "V",
             "val_is_utf8" => 0,
             "val_normalized" => "V",
             "xor_val" => "4294967293"
           },
           {
             "h0" => "8257329964001049281",
             "h1_keys" => 2,
             "idx" => 6,
             "key" => "O",
             "key_is_utf8" => 0,
             "key_normalized" => "O",
             "val" => "P",
             "val_is_utf8" => 0,
             "val_normalized" => "P",
             "xor_val" => 1
           },
           {
             "h0" => "5518171323424817881",
             "h1_keys" => 1,
             "idx" => 7,
             "key" => "E",
             "key_is_utf8" => 0,
             "key_normalized" => "E",
             "val" => "F",
             "val_is_utf8" => 0,
             "val_normalized" => "F",
             "xor_val" => "4294967291"
           },
           {
             "h0" => "3591181703942984702",
             "h1_keys" => 1,
             "idx" => 8,
             "key" => "Q",
             "key_is_utf8" => 0,
             "key_normalized" => "Q",
             "val" => "R",
             "val_is_utf8" => 0,
             "val_normalized" => "R",
             "xor_val" => "4294967288"
           },
           {
             "h0" => "8458370515337648683",
             "h1_keys" => 1,
             "idx" => 9,
             "key" => "W",
             "key_is_utf8" => 0,
             "key_normalized" => "W",
             "val" => "X",
             "val_is_utf8" => 0,
             "val_normalized" => "X",
             "xor_val" => "4294967286"
           },
           {
             "h0" => "18153270191496908466",
             "idx" => 10,
             "key" => "C",
             "key_is_utf8" => 0,
             "key_normalized" => "C",
             "val" => "D",
             "val_is_utf8" => 0,
             "val_normalized" => "D"
           },
           {
             "h0" => "5043040936135718210",
             "h1_keys" => 2,
             "idx" => 11,
             "key" => "A",
             "key_is_utf8" => 0,
             "key_normalized" => "A",
             "val" => "B",
             "val_is_utf8" => 0,
             "val_normalized" => "B",
             "xor_val" => 2
           },
           {
             "h0" => "15354065489600908969",
             "h1_keys" => 1,
             "idx" => 12,
             "key" => "I",
             "key_is_utf8" => 0,
             "key_normalized" => "I",
             "val" => "J",
             "val_is_utf8" => 0,
             "val_normalized" => "J",
             "xor_val" => "4294967285"
           }
    ],
    "simple hash A-Z",
) or diag Dumper($buckets);
 
%hash=map {
    my $key= chr($_);
    utf8::upgrade($key) if $_ % 2;
    $key => $key
} 250 .. 260;
$buckets= $o->compute(\%hash);
is_deeply($buckets,
    [
           {
             "h0" => "7329038856428266488",
             "h1_keys" => 1,
             "idx" => 0,
             "key" => "\376",
             "key_is_utf8" => 0,
             "key_normalized" => "\376",
             "val" => "\376",
             "val_is_utf8" => 0,
             "val_normalized" => "\376",
             "xor_val" => "4294967293"
           },
           {
             "h0" => "7406429479659263986",
             "h1_keys" => 2,
             "idx" => 1,
             "key" => "\x{101}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\201",
             "val" => "\x{101}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\201",
             "xor_val" => 2
           },
           {
             "h0" => "7026079938176527097",
             "h1_keys" => 1,
             "idx" => 2,
             "key" => "\x{103}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\203",
             "val" => "\x{103}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\203",
             "xor_val" => "4294967292"
           },
           {
             "h0" => "2560445542346638988",
             "idx" => 3,
             "key" => "\x{100}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\200",
             "val" => "\x{100}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\200"
           },
           {
             "h0" => "15075586565050556610",
             "h1_keys" => 3,
             "idx" => 4,
             "key" => "\x{102}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\202",
             "val" => "\x{102}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\202",
             "xor_val" => 1
           },
           {
             "h0" => "10520228168695442556",
             "h1_keys" => 1,
             "idx" => 5,
             "key" => "\x{ff}",
             "key_is_utf8" => 2,
             "key_normalized" => "\377",
             "val" => "\x{ff}",
             "val_is_utf8" => 1,
             "val_normalized" => "\303\277",
             "xor_val" => "4294967290"
           },
           {
             "h0" => "10697593882658072482",
             "h1_keys" => 1,
             "idx" => 6,
             "key" => "\372",
             "key_is_utf8" => 0,
             "key_normalized" => "\372",
             "val" => "\372",
             "val_is_utf8" => 0,
             "val_normalized" => "\372",
             "xor_val" => "4294967286"
           },
           {
             "h0" => "12905152200806003791",
             "idx" => 7,
             "key" => "\374",
             "key_is_utf8" => 0,
             "key_normalized" => "\374",
             "val" => "\374",
             "val_is_utf8" => 0,
             "val_normalized" => "\374"
           },
           {
             "h0" => "11318277185996573588",
             "idx" => 8,
             "key" => "\x{fd}",
             "key_is_utf8" => 2,
             "key_normalized" => "\375",
             "val" => "\x{fd}",
             "val_is_utf8" => 1,
             "val_normalized" => "\303\275"
           },
           {
             "h0" => "11837163118808456557",
             "idx" => 9,
             "key" => "\x{104}",
             "key_is_utf8" => 1,
             "key_normalized" => "\304\204",
             "val" => "\x{104}",
             "val_is_utf8" => 1,
             "val_normalized" => "\304\204"
           },
           {
             "h0" => "13939891494320893160",
             "h1_keys" => 2,
             "idx" => 10,
             "key" => "\x{fb}",
             "key_is_utf8" => 2,
             "key_normalized" => "\373",
             "val" => "\x{fb}",
             "val_is_utf8" => 1,
             "val_normalized" => "\303\273",
             "xor_val" => 2
           }
    ],
    "hash with utf8 works as expected",
) or diag Dumper($buckets);

t/OnDisk.pl  view on Meta::CPAN

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
ok(defined($srand),"srand as expected: $srand");
my $eval_ok= eval {
    tie my(%fail), $class => $0;
    1;
};
my $error= !$eval_ok && $@;
ok($error,"it failed: $@");
 
my $tmpdir= File::Temp->newdir();
 
my $not_utf8= "not utf8: \x{DF}";
my $utf8_can_be_downgraded= "was utf8: \x{DF}";
utf8::upgrade($utf8_can_be_downgraded);
my $must_be_utf8= "is utf8: \x{100}"; # this can ONLY be represented as utf8
my @source_hashes= (
    simple => {
        foo => "bar",
        baz => "bop",
        fiz => "shmang",
        plop => "shwoosh",
    },
    large => { map { $_ => $_ } 1 .. 50000 },
    mixed_utf8 => {
        $not_utf8 => $not_utf8,
        $utf8_can_be_downgraded => $utf8_can_be_downgraded,
        $must_be_utf8 => $must_be_utf8,
        map { chr($_) => chr($_) } 240 .. 270,
    },
    pow2_08 =>
    { map { $_ => $_ } 1 .. 8 },
    pow2_16 =>
    { map { $_ => $_ } 1 .. 16 },
    pow2_32 =>
    { map { $_ => $_ } 1 .. 32 },
    pow2_64 =>
    { map { $_ => $_ } 1 .. 64 },
 
    chr_chr_utf8 =>
    { map { chr($_) => chr($_) } 256 .. 270 },
    chr_num_utf8 =>
    { map { chr($_) => $_ } 256 .. 270 },
    num_chr_utf8 =>
    { map { $_ => chr($_) } 256 .. 270 },
    mix_mix_utf8 =>
    { map { ($_ % 2 ? chr($_) : $_) => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
    chr_mix_utf8 =>
    { map { chr($_)                 => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
    num_mix_utf8 =>
    { map { $_                      => ($_ % 2 ? $_ : chr($_)) } 256 .. 270 },
    mix_num_utf8 =>
    { map { ($_ % 2 ? chr($_) : $_) => $_                      } 256 .. 270 },
    mix_chr_utf8 =>
    { map { ($_ % 2 ? chr($_) : $_) => chr($_)                 } 256 .. 270 },
 
);
 
my $rand_seed= join("",map chr(rand 256), 1..16);
foreach my $seed ("1234567812345678", undef, $rand_seed) {
    foreach my $idx (0 .. (@source_hashes/2)-1) {
        my $name= $source_hashes[$idx*2];
        my $source_hash= $source_hashes[$idx*2+1];
        foreach my $variant (defined($ENV{VARIANT}) ? ($ENV{VARIANT}) : (MIN_VARIANT .. MAX_VARIANT)) {

t/OnDisk.pl  view on Meta::CPAN

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
                    is_deeply(\@got_keys,\@want_keys,"keys in both are the same ($title)");
                    is_deeply(\@got_each_keys,\@want_keys,"got_keys and got_each_keys agree ($title)");
 
                    is_deeply(\@got_values,\@want_values,"got_values and got_each_values agree ($title)");
                    is_deeply(\@got_fetch_values,\@want_values,"values in both are same ($title)");
                    is_deeply(\@got_each_values,\@want_values,"values in both are same ($title)");
 
                    {
                        my @bad;
                        foreach my $idx (0..$#got_keys) {
                            if (utf8::is_utf8($got_keys[$idx]) != utf8::is_utf8($want_keys[$idx])) {
                                push @bad, [ $got_keys[$idx], $want_keys[$idx] ];
                            }
                        }
                        is(0+@bad,0,"no keys with differing utf8 flags ($title)")
                            or diag Dumper($bad[0]);
                    }
 
                } else {
                    ok(0,"test cannot pass if make_file dies") for 1..17;
                }
            }
        }
    }
}



( run in 1.577 second using v1.01-cache-2.11-cpan-3b35f9de6a3 )