ClickHouse-Encoder

 view release on metacpan or  search on metacpan

decode.c  view on Meta::CPAN

#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include <string.h>
#include <stdint.h>

#include "types.h"
#include "decimal.h"
#include "datetime.h"
#include "json_kind.h"
#include "decode.h"


/* ===== DECODER ============================================================
 * Symmetric counterpart to encode_column. Reads raw Native bytes through
 * a (cursor, end) pair, recursively building SVs. parse_type() returns
 * the same TypeInfo* used on the encode side, so the type tree is shared. */

/* Subtraction form: the more obvious `(*p) + (needed) > end` form
 * overflows the pointer when `needed` is attacker-controlled via a
 * crafted varint (CH varints can encode up to ~2^63). All call sites
 * maintain the invariant `*p <= end`, so `end - *p` is a safe pointer
 * difference yielding a non-negative `ptrdiff_t` we can compare against
 * `needed` as a UV. */
#define DEC_NEED(needed)                                                     \
    do {                                                                     \
        if ((UV)(needed) > (UV)(end - (*p)))                                 \
            croak("decode: buffer truncated (need %lu more bytes)",          \
                  (unsigned long)(needed));                                  \
    } while (0)

/* Read a little-endian multi-byte unsigned integer from a byte buffer.
 * Endianness-portable replacement for `memcpy(&v, ptr, N)`, which would
 * read big-endian values on a BE host and silently misdecode the wire
 * (CH Native is LE everywhere). For signed and floating-point reads,
 * the caller bit-casts via memcpy from the unsigned result. */
static inline uint16_t dec_le16(const unsigned char *b) {
    return (uint16_t)b[0] | ((uint16_t)b[1] << 8);
}
static inline uint32_t dec_le32(const unsigned char *b) {
    return (uint32_t)b[0]
         | ((uint32_t)b[1] << 8)
         | ((uint32_t)b[2] << 16)
         | ((uint32_t)b[3] << 24);
}
static inline uint64_t dec_le64(const unsigned char *b) {
    return (uint64_t)b[0]
         | ((uint64_t)b[1] << 8)
         | ((uint64_t)b[2] << 16)
         | ((uint64_t)b[3] << 24)
         | ((uint64_t)b[4] << 32)
         | ((uint64_t)b[5] << 40)
         | ((uint64_t)b[6] << 48)
         | ((uint64_t)b[7] << 56);
}

UV dec_varint(pTHX_ const unsigned char **p, const unsigned char *end) {
    UV v = 0;
    int shift = 0;
    while (1) {
        DEC_NEED(1);
        unsigned char b = *(*p)++;
        v |= ((UV)(b & 0x7f)) << shift;
        if (!(b & 0x80)) break;
        shift += 7;
        if (shift >= 64) croak("decode: varint exceeds 64 bits");
    }
    return v;
}

void dec_lenpfx_string(pTHX_ const unsigned char **p,
                              const unsigned char *end,
                              const char **out_s, STRLEN *out_len) {
    UV len = dec_varint(aTHX_ p, end);
    DEC_NEED(len);
    *out_s   = (const char *)(*p);
    *out_len = (STRLEN)len;
    *p      += len;
}

/* Shared prologue for decode_block / decode_block_rows: validate the
 * input SV, position the cursor at the requested offset, read the
 * block header (ncols + nrows), and run bounds checks. `fname` is
 * embedded in croak messages so each XSUB reports its own name.
 * Returns ncols/nrows by out-param; the cursor pair (p, end) is set
 * up so the caller can resume column-by-column decoding. */
void decode_block_prologue(pTHX_ SV *bytes, UV start_offset,
                                  const char *fname,
                                  const unsigned char **out_start,
                                  const unsigned char **out_p,
                                  const unsigned char **out_end,
                                  UV *out_ncols, UV *out_nrows) {
    /* Materialize lvalue / magical SVs (e.g. the PVLV returned by 2-arg
     * substr) before inspecting. SvOK on a fresh substr-LV returns
     * false until SvGETMAGIC has run; SvPVbyte itself triggers the
     * magic, so we just go straight to it and let buf_len = 0 cover
     * the genuine empty-bytes case. Reject only true undef. */
    SvGETMAGIC(bytes);
    if (!SvOK(bytes)) croak("%s: bytes argument is undef", fname);
    STRLEN buf_len;
    const char *buf = SvPVbyte(bytes, buf_len);
    if (start_offset > buf_len)
        croak("%s: offset %lu past end of buffer (%lu bytes)",
              fname, (unsigned long)start_offset, (unsigned long)buf_len);
    const unsigned char *p     = (const unsigned char *)buf + start_offset;
    const unsigned char *start = p;
    const unsigned char *end   = (const unsigned char *)buf + buf_len;

    UV ncols = dec_varint(aTHX_ &p, end);
    UV nrows = dec_varint(aTHX_ &p, end);

    /* Defensive bounds: ncols and nrows from the wire could be
     * arbitrarily large in a malicious or corrupted block. Each column
     * needs at least 2 bytes of name+type header; each row of the
     * smallest type takes at least 1 byte. Reject obviously-impossible
     * counts up front so we never allocate gigabytes for fuzz input. */
    if (ncols > (UV)(end - p))
        croak("%s: ncols=%lu exceeds remaining buffer (%lu bytes)",
              fname, (unsigned long)ncols, (unsigned long)(end - p));
    UV remaining_after_headers = (UV)(end - p);
    if (nrows > remaining_after_headers && nrows > 0)
        croak("%s: nrows=%lu exceeds remaining buffer (%lu bytes)",
              fname, (unsigned long)nrows,
              (unsigned long)remaining_after_headers);

    *out_start = start;
    *out_p     = p;
    *out_end   = end;
    *out_ncols = ncols;
    *out_nrows = nrows;
}

/* Build a JSON-style boolean SV: bless(\(b ? 1 : 0), 'JSON::PP::Boolean').
 * Used when decoding JSON/Dynamic Bool variant slots so that re-encoding
 * the round-tripped value picks the same Bool wire variant instead of
 * widening it to Int64 (which a naked newSViv(0|1) would trigger). The
 * blessed package matches what json_pkg_is_bool() recognizes on the
 * encode side. */
static SV *make_json_bool_sv(pTHX_ int b) {
    SV *inner = newSViv(b ? 1 : 0);
    SV *rv    = newRV_noinc(inner);
    sv_bless(rv, gv_stashpv("JSON::PP::Boolean", GV_ADD));
    return rv;
}

decode.c  view on Meta::CPAN

            uint64_t u = dec_le64(*p); *p += 8;
            int64_t v;  memcpy(&v, &u, 8);
            av_store(sub, k, newSViv((IV)v));
        }
        return;
    }
    if (kind == JV_FLOAT64) {
        if ((UV)nv_rows > (UV)(end - *p) / 8)
            croak("decode: buffer truncated (need %lu more bytes)",
                  (unsigned long)((UV)nv_rows * 8));
        for (k = 0; k < nv_rows; k++) {
            uint64_t u = dec_le64(*p); *p += 8;
            double v;  memcpy(&v, &u, 8);
            av_store(sub, k, newSVnv(v));
        }
        return;
    }
    if (kind == JV_BOOL) {
        if ((UV)nv_rows > (UV)(end - *p))
            croak("decode: buffer truncated (need %lu more bytes)",
                  (unsigned long)nv_rows);
        for (k = 0; k < nv_rows; k++) {
            unsigned char b = *(*p)++;
            av_store(sub, k, make_json_bool_sv(aTHX_ b));
        }
        return;
    }
    if (kind == JV_ARRAY_BOOL || kind == JV_ARRAY_FLOAT64
     || kind == JV_ARRAY_INT64 || kind == JV_ARRAY_STRING) {
        /* Array variant column: N UInt64 offsets, then offsets[N-1]
         * inner-type elements concatenated. */
        if (nv_rows == 0) return;
        if ((UV)nv_rows > (UV)(end - *p) / 8)
            croak("decode: buffer truncated (need %lu more bytes)",
                  (unsigned long)((UV)nv_rows * 8));
        uint64_t *offs;
        Newx(offs, nv_rows, uint64_t);
        SAVEFREEPV(offs);
        uint64_t prev_o = 0;
        for (k = 0; k < nv_rows; k++) {
            offs[k] = dec_le64(*p); *p += 8;
            /* Per-offset overflow + monotonicity; protects later casts to
             * SSize_t (e.g. av_extend) from negative-wrap. */
            if (offs[k] > (uint64_t)SSize_t_MAX || offs[k] < prev_o)
                croak("decode JSON: Array variant offset[%ld]=%lu invalid "
                      "(prev=%lu)",
                      (long)k, (unsigned long)offs[k], (unsigned long)prev_o);
            prev_o = offs[k];
        }
        uint64_t total = offs[nv_rows - 1];
        /* Defensive: total elements must fit into the remaining buffer
         * (1+ bytes per element minimum). Catches corrupted offset
         * lists before they trigger huge AV allocations. */
        if (total > (uint64_t)(end - *p))
            croak("decode JSON: Array variant total=%lu exceeds remaining "
                  "buffer (%lu bytes)",
                  (unsigned long)total, (unsigned long)(end - *p));
        for (k = 0; k < nv_rows; k++)
            av_store(sub, k, newRV_noinc((SV*)newAV()));

        /* Inner cursor walks through elements while row_idx advances
         * each time we hit the cumulative offset boundary. */
        uint64_t prev = 0;
        SSize_t row_idx = 0;
        AV *inner = (AV*)SvRV(*av_fetch(sub, 0, 0));
        if (offs[0] > 0) av_extend(inner, (SSize_t)offs[0] - 1);
        SSize_t inner_cursor = 0;

        STRLEN per_elem = (kind == JV_ARRAY_BOOL) ? 1 : 8;
        /* `total` is attacker-controlled (sum of wire offsets); use the
         * division-form check to avoid the multiplication overflowing. */
        if (kind != JV_ARRAY_STRING
            && total > (uint64_t)(end - *p) / per_elem)
            croak("decode: buffer truncated (need %lu more bytes)",
                  (unsigned long)(per_elem * total));

        uint64_t i;
        for (i = 0; i < total; i++) {
            while (inner_cursor >= (SSize_t)(offs[row_idx] - prev)) {
                prev = offs[row_idx];
                row_idx++;
                /* If a corrupted offset list has trailing zero-length
                 * rows that the outer total didn't cover, row_idx
                 * could walk past the populated entries. Bail before
                 * av_fetch returns NULL and we deref it. */
                if (row_idx >= nv_rows)
                    croak("decode: array variant offsets advanced past "
                          "nv_rows=%ld (corrupted block)", (long)nv_rows);
                inner_cursor = 0;
                inner = (AV*)SvRV(*av_fetch(sub, row_idx, 0));
                uint64_t n2 = offs[row_idx] - prev;
                if (n2 > 0) av_extend(inner, (SSize_t)n2 - 1);
            }
            SV *ev;
            switch (kind) {
                case JV_ARRAY_BOOL: {
                    unsigned char b = *(*p)++;
                    ev = make_json_bool_sv(aTHX_ b);
                    break;
                }
                case JV_ARRAY_INT64: {
                    uint64_t u = dec_le64(*p); *p += 8;
                    int64_t v;  memcpy(&v, &u, 8);
                    ev = newSViv((IV)v);
                    break;
                }
                case JV_ARRAY_FLOAT64: {
                    uint64_t u = dec_le64(*p); *p += 8;
                    double v;  memcpy(&v, &u, 8);
                    ev = newSVnv(v);
                    break;
                }
                case JV_ARRAY_STRING: {
                    const char *vs; STRLEN vl;
                    dec_lenpfx_string(aTHX_ p, end, &vs, &vl);
                    ev = newSVpvn(vs, vl);
                    break;
                }
                default: ev = newSV(0);  /* unreachable */
            }
            av_store(inner, inner_cursor++, ev);
        }
        return;
    }
    croak("decode %s: internal: unknown kind %d", ctx, kind);
}

/* Helpers that bulk-read same-size scalars into the array, since the per-
 * row dispatch overhead of unpack-style XS loops dwarfs the data read. */
#define DEC_SCALAR_LOOP(av, nrows, sv_expr) do {                             \
    SSize_t r;                                                               \
    for (r = 0; r < (nrows); r++) av_store(av, r, (sv_expr));                \
} while (0)

SV *decode_column(pTHX_ const unsigned char **p,
                         const unsigned char *end,
                         TypeInfo *t, SSize_t nrows) {
    AV *av = newAV();
    if (nrows > 0) av_extend(av, nrows - 1);
    SSize_t r;

    switch (t->code) {
        case T_INT8: {
            DEC_NEED((STRLEN)nrows);
            DEC_SCALAR_LOOP(av, nrows, newSViv((IV)(int8_t)*(*p)++));
            break;
        }
        case T_UINT8: case T_BOOL: case T_ENUM8: {
            DEC_NEED((STRLEN)nrows);
            DEC_SCALAR_LOOP(av, nrows, newSVuv((UV)*(*p)++));
            break;
        }
        case T_INT16: {
            DEC_NEED((STRLEN)(2 * nrows));
            for (r = 0; r < nrows; r++) {
                /* Use dec_le16 + memcpy bit-cast (same pattern as INT32
                 * and INT64) to keep the signed conversion well-defined
                 * across compilers; the inline (int16_t)(...) cast on
                 * a promoted-int high-bit value is implementation-
                 * defined in C99/C11. */
                uint16_t u = dec_le16(*p);
                int16_t  v;  memcpy(&v, &u, 2);
                av_store(av, r, newSViv((IV)v));
                *p += 2;
            }
            break;
        }
        case T_UINT16: case T_DATE: case T_ENUM16: {
            DEC_NEED((STRLEN)(2 * nrows));
            for (r = 0; r < nrows; r++) {
                uint16_t v = dec_le16(*p);
                av_store(av, r, newSVuv((UV)v));
                *p += 2;
            }
            break;
        }
        case T_INT32: case T_DATE32: case T_DECIMAL32: {
            DEC_NEED((STRLEN)(4 * nrows));
            for (r = 0; r < nrows; r++) {
                uint32_t u = dec_le32(*p);
                int32_t v;  memcpy(&v, &u, 4);

decode.c  view on Meta::CPAN

                croak("decode: LowCardinality index count (%lu) != nrows (%ld)",
                      (unsigned long)idx_n, (long)nrows);
            size_t idx_bytes = (idx_type == 0) ? 1 :
                               (idx_type == 1) ? 2 :
                               (idx_type == 2) ? 4 : 8;
            DEC_NEED((STRLEN)(idx_bytes * idx_n));
            for (r = 0; r < nrows; r++) {
                uint64_t i = 0;
                switch (idx_bytes) {
                    case 1: i = (uint64_t)(*p)[0]; break;
                    case 2: i = (uint64_t)dec_le16(*p); break;
                    case 4: i = (uint64_t)dec_le32(*p); break;
                    case 8: i = dec_le64(*p); break;
                }
                *p += idx_bytes;
                if (is_null && i == 0) {
                    av_store(av, r, newSV(0));
                } else {
                    if (i >= dict_n)
                        croak("decode: LowCardinality index %lu out of range "
                              "(dict_n=%lu) at row %ld",
                              (unsigned long)i, (unsigned long)dict_n, (long)r);
                    SV **elem = av_fetch(dict, (SSize_t)i, 0);
                    av_store(av, r, elem ? SvREFCNT_inc(*elem) : newSV(0));
                }
            }
            SvREFCNT_dec(dict_rv);
            break;
        }
        case T_VARIANT: {
            DEC_NEED(8);
            uint64_t mode = dec_le64(*p);
            *p += 8;
            if (mode != 0) croak("decode: Variant mode != 0 (got %lu)", (unsigned long)mode);
            DEC_NEED((STRLEN)nrows);
            unsigned char *wire_disc;
            Newx(wire_disc, nrows, unsigned char);
            SAVEFREEPV(wire_disc);
            for (r = 0; r < nrows; r++) wire_disc[r] = *(*p)++;
            int nvar = t->tuple_len;
            SSize_t *counts;
            Newxz(counts, nvar, SSize_t);
            SAVEFREEPV(counts);
            for (r = 0; r < nrows; r++) {
                unsigned char w = wire_disc[r];
                if (w != 255) {
                    if (w >= nvar) croak("decode: Variant wire idx %u out of range", w);
                    counts[w]++;
                }
            }
            /* Decode each sub-column in wire (alphabetical) order; the
             * decl index of wire position w is t->variant_wire_to_decl[w]. */
            SV **subcols;
            Newx(subcols, nvar, SV*);
            SAVEFREEPV(subcols);
            int w;
            for (w = 0; w < nvar; w++) {
                int decl = t->variant_wire_to_decl[w];
                subcols[w] = decode_column(aTHX_ p, end, t->tuple[decl], counts[w]);
            }
            SSize_t *cursors;
            Newxz(cursors, nvar, SSize_t);
            SAVEFREEPV(cursors);
            for (r = 0; r < nrows; r++) {
                unsigned char wd = wire_disc[r];
                if (wd == 255) {
                    av_store(av, r, newSV(0));
                } else {
                    int decl = t->variant_wire_to_decl[wd];
                    SV **elem = av_fetch((AV *)SvRV(subcols[wd]), cursors[wd]++, 0);
                    AV *pair = newAV();
                    av_extend(pair, 1);
                    av_store(pair, 0, newSViv(decl));
                    av_store(pair, 1, elem ? SvREFCNT_inc(*elem) : newSV(0));
                    av_store(av, r, newRV_noinc((SV*)pair));
                }
            }
            for (w = 0; w < nvar; w++) SvREFCNT_dec(subcols[w]);
            break;
        }

        case T_JSON: {
            /* Object structure prefix. Versions: V1=0, V2=2, V3=4. */
            DEC_NEED(8);
            uint64_t obj_ver = dec_le64(*p); *p += 8;
            if (obj_ver != 0 && obj_ver != 2 && obj_ver != 4)
                croak("decode JSON: unsupported Object version %lu "
                      "(known: 0, 2, 4); upgrade ClickHouse::Encoder",
                      (unsigned long)obj_ver);
            if (obj_ver == 0) {
                /* V1: extra max_dynamic_paths varint before count. */
                (void)dec_varint(aTHX_ p, end);
            }
            UV num_paths = dec_varint(aTHX_ p, end);
            /* Defensive: each path takes at least 2 bytes (1-byte varint
             * length prefix + 1-byte name). Reject obviously-impossible
             * counts before allocating arrays sized by num_paths. */
            if (num_paths > (UV)(end - *p))
                croak("decode JSON: num_paths=%lu exceeds remaining buffer "
                      "(%lu bytes)",
                      (unsigned long)num_paths, (unsigned long)(end - *p));

            char **paths = NULL;
            STRLEN *path_lens = NULL;
            if (num_paths > 0) {
                Newx(paths, num_paths, char*);
                SAVEFREEPV(paths);
                Newx(path_lens, num_paths, STRLEN);
                SAVEFREEPV(path_lens);
            }
            UV pi;
            for (pi = 0; pi < num_paths; pi++) {
                const char *ps;
                STRLEN pl;
                dec_lenpfx_string(aTHX_ p, end, &ps, &pl);
                paths[pi]     = (char*)ps;  /* aliases input buffer */
                path_lens[pi] = pl;
            }
            if (obj_ver == 4) {
                /* V3 adds shared_data_serialization_version, and a
                 * buckets count when that version is MAP_WITH_BUCKETS
                 * (=1) or ADVANCED (=2). Native format with statistics
                 * disabled (the default) skips the stats afterwards. */
                UV shared_ver = dec_varint(aTHX_ p, end);
                if (shared_ver == 1 || shared_ver == 2)
                    (void)dec_varint(aTHX_ p, end);
            }

            /* Per-path Dynamic prefix: collect type-name lists. */
            int **path_kind_list = NULL;  /* path_kind_list[p][i] = JsonValueKind */

decode.c  view on Meta::CPAN

                }
            }

            /* Per-path Variant data: discs + per-variant values. */
            for (pi = 0; pi < num_paths; pi++) {
                /* Read N disc bytes. */
                DEC_NEED((STRLEN)nrows);
                unsigned char *discs;
                Newx(discs, nrows, unsigned char);
                SAVEFREEPV(discs);
                for (r = 0; r < nrows; r++) discs[r] = *(*p)++;

                /* Compute per-variant row counts and lex-position table.
                 * The wire variant list has (kind_count + 1) entries (the
                 * +1 is SharedVariant inserted at its lex position 7).
                 * Rebuild the kind mask from the type-name list we just
                 * parsed and reuse the same lex-table helper as encode. */
                int nv = path_kind_count[pi];
                int wire_slots = nv + 1;
                SSize_t *var_counts;
                Newxz(var_counts, wire_slots, SSize_t);
                SAVEFREEPV(var_counts);

                int slot_to_kind_or_shared[JSON_LEX_SLOTS];
                {
                    unsigned mask = 0;
                    int i;
                    for (i = 0; i < nv; i++)
                        mask |= 1u << path_kind_list[pi][i];
                    (void)json_build_lex_table(mask, slot_to_kind_or_shared);
                }

                for (r = 0; r < nrows; r++) {
                    unsigned char d = discs[r];
                    if (d == 0xff) continue;
                    if (d >= wire_slots)
                        croak("decode JSON: path '%.*s' disc %u out of range "
                              "(wire_slots=%d)",
                              (int)path_lens[pi], paths[pi], d, wire_slots);
                    var_counts[d]++;
                }

                /* Decode each wire-slot's column data. SharedVariant
                 * (kind=-1) is a String column on the wire, which our
                 * encoder never populates (0 rows here in practice). */
                AV **var_avs;
                Newxz(var_avs, wire_slots, AV*);
                SAVEFREEPV(var_avs);
                int slot;
                for (slot = 0; slot < wire_slots; slot++) {
                    SSize_t nv_rows = var_counts[slot];
                    AV *sub = newAV();
                    var_avs[slot] = sub;
                    sv_2mortal((SV*)sub);
                    if (nv_rows > 0) av_extend(sub, nv_rows - 1);
                    decode_dynamic_variant_slot(aTHX_ p, end, sub,
                        slot_to_kind_or_shared[slot], nv_rows, "JSON");
                }

                /* Distribute values into per-row hashes. */
                SSize_t *cursors;
                Newxz(cursors, wire_slots, SSize_t);
                SAVEFREEPV(cursors);
                for (r = 0; r < nrows; r++) {
                    unsigned char d = discs[r];
                    if (d == 0xff) continue;
                    SV **e = av_fetch(var_avs[d], cursors[d]++, 0);
                    if (!e) continue;
                    SV *row_rv = *av_fetch(av, r, 0);
                    HV *row_hv = (HV*)SvRV(row_rv);
                    SV *val = SvREFCNT_inc(*e);
                    hv_store(row_hv, paths[pi], (I32)path_lens[pi], val, 0);
                }
            }

            /* Trailing shared data: N UInt64 offsets, then if final
             * offset > 0, offsets[N-1] key strings + value strings.
             * Only the last offset determines downstream parsing; skip
             * the rest with a single pointer bump. */
            uint64_t last_offset = 0;
            if (nrows > 0) {
                DEC_NEED((STRLEN)(8 * nrows));
                last_offset = dec_le64(*p + 8 * (nrows - 1));
                *p += 8 * nrows;
            }
            if (last_offset > 0) {
                /* Each string is a length varint + bytes (>= 1 byte
                 * total). A corrupted last_offset must not let us spin
                 * calling dec_lenpfx_string 2^32 times before each one
                 * croaks on the truncated buffer. */
                if (last_offset > (uint64_t)(end - *p))
                    croak("decode JSON: shared-data last_offset=%lu "
                          "exceeds remaining buffer (%lu bytes)",
                          (unsigned long)last_offset,
                          (unsigned long)(end - *p));
                uint64_t i;
                for (i = 0; i < last_offset; i++) {
                    const char *s; STRLEN l;
                    dec_lenpfx_string(aTHX_ p, end, &s, &l);
                }
                for (i = 0; i < last_offset; i++) {
                    const char *s; STRLEN l;
                    dec_lenpfx_string(aTHX_ p, end, &s, &l);
                }
            }

            /* Unflatten dotted-path keys into nested hashes. Symmetric to the
             * encoder which flattens nested hashrefs into dotted paths on the
             * wire. Collision-safe: if an intermediate hop is already a
             * non-HV (some path emitted a leaf at "a" while another emitted
             * "a.b"), the dotted form is left intact at the top level. */
            for (r = 0; r < nrows; r++) {
                SV *row_rv = *av_fetch(av, r, 0);
                HV *row_hv = (HV*)SvRV(row_rv);
                /* Snapshot keys: we may mutate row_hv during iteration. */
                AV *keys = (AV*)sv_2mortal((SV*)newAV());
                hv_iterinit(row_hv);
                HE *he;
                while ((he = hv_iternext(row_hv))) {
                    I32 klen;
                    char *kstr = hv_iterkey(he, &klen);
                    if (memchr(kstr, '.', klen))
                        av_push(keys, newSVpvn(kstr, klen));
                }
                SSize_t nk = av_len(keys) + 1;
                SSize_t ki;
                for (ki = 0; ki < nk; ki++) {

decode.c  view on Meta::CPAN

            }
            UV ti;
            for (ti = 0; ti < ntypes; ti++) {
                const char *ts; STRLEN tl;
                dec_lenpfx_string(aTHX_ p, end, &ts, &tl);
                int k = json_kind_from_type_name(ts, tl);
                if (k < 0)
                    croak("decode Dynamic: unsupported variant type '%.*s' "
                          "(supported: Bool, Float64, Int64, String, "
                          "Array(...) of those)",
                          (int)tl, ts);
                kinds_in_order[ti] = k;
            }

            DEC_NEED(8);
            uint64_t var_mode = dec_le64(*p); *p += 8;
            if (var_mode != 0)
                croak("decode Dynamic: only BASIC variant mode supported "
                      "(got %lu)", (unsigned long)var_mode);

            int nv = (int)ntypes;
            int wire_slots = nv + 1;
            unsigned mask = 0;
            int i;
            for (i = 0; i < nv; i++) mask |= 1u << kinds_in_order[i];

            int slot_to_kind[JSON_LEX_SLOTS];
            (void)json_build_lex_table(mask, slot_to_kind);

            DEC_NEED((STRLEN)nrows);
            unsigned char *discs;
            Newx(discs, nrows, unsigned char);
            SAVEFREEPV(discs);
            for (r = 0; r < nrows; r++) discs[r] = *(*p)++;

            SSize_t *var_counts;
            Newxz(var_counts, wire_slots, SSize_t);
            SAVEFREEPV(var_counts);
            for (r = 0; r < nrows; r++) {
                if (discs[r] == 0xff) continue;
                if (discs[r] >= wire_slots)
                    croak("decode Dynamic: disc %u out of range "
                          "(wire_slots=%d)", discs[r], wire_slots);
                var_counts[discs[r]]++;
            }

            AV **var_avs;
            Newxz(var_avs, wire_slots, AV*);
            SAVEFREEPV(var_avs);
            int slot;
            for (slot = 0; slot < wire_slots; slot++) {
                SSize_t nv_rows = var_counts[slot];
                AV *sub = newAV();
                var_avs[slot] = sub;
                sv_2mortal((SV*)sub);
                if (nv_rows > 0) av_extend(sub, nv_rows - 1);
                decode_dynamic_variant_slot(aTHX_ p, end, sub,
                    slot_to_kind[slot], nv_rows, "Dynamic");
            }

            SSize_t *cursors;
            Newxz(cursors, wire_slots, SSize_t);
            SAVEFREEPV(cursors);
            for (r = 0; r < nrows; r++) {
                unsigned char d = discs[r];
                if (d == 0xff) { av_store(av, r, newSV(0)); continue; }
                SV **e = av_fetch(var_avs[d], cursors[d]++, 0);
                av_store(av, r, e ? SvREFCNT_inc(*e) : newSV(0));
            }
            break;
        }

        default:
            croak("decode: unhandled type code %d", t->code);
    }
    return newRV_noinc((SV *)av);
}

#undef DEC_NEED
#undef DEC_SCALAR_LOOP



( run in 0.676 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )