Log-Fmt-XS

 view release on metacpan or  search on metacpan

XS.xs  view on Meta::CPAN

is_okchr(unsigned char c)
{
    return (c == 0x21 ||
            (c >= 0x23 && c <= 0x3C) ||
            (c >= 0x3E && c <= 0x5B) ||
            (c >= 0x5D && c <= 0x7E));
}

/* Check if string can be used as a bare logfmt value (matches KEY_RE) */
static int
is_bare_value(const char *s, STRLEN len)
{
    STRLEN i;
    if (len == 0)
        return 0;
    for (i = 0; i < len; i++) {
        if (!is_okchr((unsigned char)s[i]))
            return 0;
    }
    return 1;
}

/*
 * Check if a Unicode codepoint needs \x{XX} escaping.
 * This is called AFTER \, ", \t, \n, \r are handled.
 * Matches Perl's [\pC\v] — General Category C (Cc, Cf, Co, Cs) plus
 * vertical whitespace characters.
 */
static int
needs_escape(UV cp)
{
    /* Cc: C0 controls (0x00-0x1F) — includes \t, \n, \r but those are
     * already handled before this function is called */
    if (cp <= 0x1F)
        return 1;

    /* DEL */
    if (cp == 0x7F)
        return 1;

    /* C1 controls (0x80-0x9F), includes NEL (0x85) */
    if (cp >= 0x80 && cp <= 0x9F)
        return 1;

    /* Vertical whitespace not in Cc: LINE SEPARATOR, PARAGRAPH SEPARATOR */
    if (cp == 0x2028 || cp == 0x2029)
        return 1;

    /* Cf (format) characters — comprehensive list */
    if (cp == 0x00AD)   return 1;  /* SOFT HYPHEN */
    if (cp >= 0x0600 && cp <= 0x0605) return 1;
    if (cp == 0x061C)   return 1;
    if (cp == 0x06DD)   return 1;
    if (cp == 0x070F)   return 1;
    if (cp == 0x08E2)   return 1;
    if (cp == 0x180E)   return 1;
    if (cp >= 0x200B && cp <= 0x200F) return 1;  /* includes ZWJ (0x200D) */
    if (cp >= 0x202A && cp <= 0x202E) return 1;
    if (cp >= 0x2060 && cp <= 0x2064) return 1;
    if (cp >= 0x2066 && cp <= 0x206F) return 1;
    if (cp == 0xFEFF)   return 1;  /* BOM */
    if (cp >= 0xFFF9 && cp <= 0xFFFB) return 1;

    /* Co (private use) */
    if (cp >= 0xE000 && cp <= 0xF8FF) return 1;

    /* Cs (surrogates) — shouldn't appear in valid strings */
    if (cp >= 0xD800 && cp <= 0xDFFF) return 1;

    /* Higher plane Cf */
    if (cp == 0x110BD || cp == 0x110CD) return 1;
    if (cp >= 0x13430 && cp <= 0x1343F) return 1;
    if (cp >= 0x1BCA0 && cp <= 0x1BCA3) return 1;
    if (cp >= 0x1D173 && cp <= 0x1D17A) return 1;
    if (cp == 0xE0001)  return 1;
    if (cp >= 0xE0020 && cp <= 0xE007F) return 1;

    /* Higher plane Co (private use) */
    if (cp >= 0xF0000 && cp <= 0xFFFFD) return 1;
    if (cp >= 0x100000 && cp <= 0x10FFFD) return 1;

    /* Noncharacters (subset of Cn) */
    if ((cp & 0xFFFE) == 0xFFFE) return 1;
    if (cp >= 0xFDD0 && cp <= 0xFDEF) return 1;

    return 0;
}

/*
 * Quote a string value for logfmt output.
 * Input: a Perl SV (character string, may have UTF8 flag).
 * Output: a new SV containing the quoted byte string (no UTF8 flag),
 *         wrapped in double quotes, with proper escaping.
 */
static SV *
quote_string_xs(pTHX_ SV *input)
{
    STRLEN len;
    const char *s = SvPV(input, len);
    bool is_utf8 = cBOOL(SvUTF8(input));
    const char *end = s + len;
    SV *out;

    /* Optimistic pre-allocate: most chars pass through or become 2-char escapes */
    out = newSV(len * 2 + 3);
    SvPOK_on(out);
    sv_catpvn(out, "\"", 1);

    while (s < end) {
        UV cp;
        STRLEN char_len;

        if (is_utf8) {
            cp = utf8_to_uvchr_buf((const U8 *)s, (const U8 *)end, &char_len);
            if (char_len == 0) {
                /* Malformed UTF-8, skip byte */
                s++;
                continue;
            }
        } else {
            cp = (UV)(unsigned char)*s;



( run in 0.542 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )