DMS-XS-Parser

 view release on metacpan or  search on metacpan

Parser.xs  view on Meta::CPAN

    }
    return newRV_noinc((SV *)h);
}

/* Build the per-entry lit hashref. The Emitter checks for the presence
 * of `integer_lit` vs `string_form` keys to dispatch — exactly one is
 * populated per entry, matching the C struct's `is_string_form` flag. */
static SV *original_lit_to_sv(pTHX_ const dms_original_literal *lit) {
    HV *h = newHV();
    if (lit->is_string_form) {
        if (lit->string_form) {
            hv_store(h, "string_form", 11, string_form_to_sv(aTHX_ lit->string_form), 0);
        }
    } else {
        if (lit->integer_lit) {
            SV *s = newSVpv(lit->integer_lit, 0);
            /* integer_lit is ASCII (digits + 0x/0o/0b prefixes + underscores)
             * — no UTF-8 decode needed. */
            hv_store(h, "integer_lit", 11, s, 0);
        }
    }
    return newRV_noinc((SV *)h);
}

/* Convert the C original-forms array to the Perl `[[path, lit], ...]`
 * shape DMS::Emitter expects. Returns an empty arrayref (not undef) when
 * `n == 0` so the Emitter's `|| []` guard is the only fallback path. */
static SV *original_forms_to_sv(pTHX_ const dms_original_form_entry *items, size_t n) {
    AV *av = newAV();
    if (n > 0) av_extend(av, (SSize_t)n - 1);
    for (size_t i = 0; i < n; i++) {
        const dms_original_form_entry *e = &items[i];
        AV *pair = newAV();
        av_extend(pair, 1);
        av_push(pair, path_segs_to_sv(aTHX_ e->path, e->path_len));
        av_push(pair, original_lit_to_sv(aTHX_ &e->lit));
        av_push(av, newRV_noinc((SV *)pair));
    }
    return newRV_noinc((SV *)av);
}

static SV *comments_to_sv(pTHX_ const dms_attached_comment *items, size_t n) {
    AV *av = newAV();
    if (n > 0) av_extend(av, (SSize_t)n - 1);
    for (size_t i = 0; i < n; i++) {
        const dms_attached_comment *ac = &items[i];
        HV *h = newHV();
        hv_store(h, "comment",  7, comment_to_sv(aTHX_ ac), 0);
        const char *pos =
            (ac->position == DMS_COMMENT_LEADING)  ? "leading"  :
            (ac->position == DMS_COMMENT_INNER)    ? "inner"    :
            (ac->position == DMS_COMMENT_TRAILING) ? "trailing" :
                                                     "floating";
        hv_store(h, "position", 8, newSVpv(pos, 0), 0);
        hv_store(h, "path",     4, path_to_sv(aTHX_ ac), 0);
        av_push(av, newRV_noinc((SV *)h));
    }
    return newRV_noinc((SV *)av);
}

/* --- Direct DMS -> conformance JSON streaming emit -------------------------
 *
 * For workloads where the only consumer of the parse tree is a JSON-emit
 * step (e.g. the conformance encoder, dms-tests harness), building the
 * full Perl SV/HV/AV/Tie::IxHash tree just to walk it once is pure waste.
 * `parse_to_json_bytes(src)` skips that round trip: it parses, then
 * serializes the dms_value tree directly into a single Perl string buffer
 * in C. No blessed sentinels, no IxHash tied magic, no per-leaf Perl call
 * frame.
 *
 * Output shape matches encoder.pl's `encode_json_value` byte-for-byte at
 * the structural level (the conformance runner re-parses the JSON, so
 * exact whitespace doesn't matter, only key order in objects). Tagged
 * scalars look like { "type": "...", "value": "..." }; tables are objects
 * preserving source key order; lists are arrays. Front matter, when
 * present, is wrapped as { "_meta": ..., "_body": ... } per the spec. */

typedef struct {
    char *buf;
    size_t len;
    size_t cap;
} jbuf;

/* XSUB.h #defines `realloc`/`free` as Perl's PerlMem_* macros which
 * require a thread-context argument. Our jbuf doesn't need that
 * indirection — it's a leaf C buffer with no Perl interaction — so we
 * #undef the macros and route through small libc-direct wrappers. The
 * Perl-side allocator is irrelevant here: jbuf memory lives only across
 * one XS call and is freed before returning. */
#ifdef realloc
#  undef realloc
#endif
#ifdef free
#  undef free
#endif

static void *libc_realloc(void *p, size_t n) { return realloc(p, n); }
static void  libc_free(void *p) { free(p); }

static void jbuf_grow(jbuf *j, size_t need) {
    size_t want = j->len + need;
    if (want <= j->cap) return;
    size_t cap = j->cap ? j->cap : 4096;
    while (cap < want) cap *= 2;
    j->buf = (char *)libc_realloc(j->buf, cap);
    j->cap = cap;
}

static inline void jbuf_putc(jbuf *j, char c) {
    if (j->len + 1 > j->cap) jbuf_grow(j, 1);
    j->buf[j->len++] = c;
}

static inline void jbuf_puts(jbuf *j, const char *s, size_t n) {
    if (j->len + n > j->cap) jbuf_grow(j, n);
    memcpy(j->buf + j->len, s, n);
    j->len += n;
}

static inline void jbuf_putcstr(jbuf *j, const char *s) {
    jbuf_puts(j, s, strlen(s));



( run in 0.691 second using v1.01-cache-2.11-cpan-140bd7fdf52 )