Log-Fmt-XS
view release on metacpan or search on metacpan
is_okchr(unsigned char c)
{
return (c == 0x21 ||
(c >= 0x23 && c <= 0x3C) ||
(c >= 0x3E && c <= 0x5B) ||
(c >= 0x5D && c <= 0x7E));
}
/* Check if string can be used as a bare logfmt value (matches KEY_RE) */
static int
is_bare_value(const char *s, STRLEN len)
{
STRLEN i;
if (len == 0)
return 0;
for (i = 0; i < len; i++) {
if (!is_okchr((unsigned char)s[i]))
return 0;
}
return 1;
}
/*
* Check if a Unicode codepoint needs \x{XX} escaping.
* This is called AFTER \, ", \t, \n, \r are handled.
* Matches Perl's [\pC\v] â General Category C (Cc, Cf, Co, Cs) plus
* vertical whitespace characters.
*/
static int
needs_escape(UV cp)
{
/* Cc: C0 controls (0x00-0x1F) â includes \t, \n, \r but those are
* already handled before this function is called */
if (cp <= 0x1F)
return 1;
/* DEL */
if (cp == 0x7F)
return 1;
/* C1 controls (0x80-0x9F), includes NEL (0x85) */
if (cp >= 0x80 && cp <= 0x9F)
return 1;
/* Vertical whitespace not in Cc: LINE SEPARATOR, PARAGRAPH SEPARATOR */
if (cp == 0x2028 || cp == 0x2029)
return 1;
/* Cf (format) characters â comprehensive list */
if (cp == 0x00AD) return 1; /* SOFT HYPHEN */
if (cp >= 0x0600 && cp <= 0x0605) return 1;
if (cp == 0x061C) return 1;
if (cp == 0x06DD) return 1;
if (cp == 0x070F) return 1;
if (cp == 0x08E2) return 1;
if (cp == 0x180E) return 1;
if (cp >= 0x200B && cp <= 0x200F) return 1; /* includes ZWJ (0x200D) */
if (cp >= 0x202A && cp <= 0x202E) return 1;
if (cp >= 0x2060 && cp <= 0x2064) return 1;
if (cp >= 0x2066 && cp <= 0x206F) return 1;
if (cp == 0xFEFF) return 1; /* BOM */
if (cp >= 0xFFF9 && cp <= 0xFFFB) return 1;
/* Co (private use) */
if (cp >= 0xE000 && cp <= 0xF8FF) return 1;
/* Cs (surrogates) â shouldn't appear in valid strings */
if (cp >= 0xD800 && cp <= 0xDFFF) return 1;
/* Higher plane Cf */
if (cp == 0x110BD || cp == 0x110CD) return 1;
if (cp >= 0x13430 && cp <= 0x1343F) return 1;
if (cp >= 0x1BCA0 && cp <= 0x1BCA3) return 1;
if (cp >= 0x1D173 && cp <= 0x1D17A) return 1;
if (cp == 0xE0001) return 1;
if (cp >= 0xE0020 && cp <= 0xE007F) return 1;
/* Higher plane Co (private use) */
if (cp >= 0xF0000 && cp <= 0xFFFFD) return 1;
if (cp >= 0x100000 && cp <= 0x10FFFD) return 1;
/* Noncharacters (subset of Cn) */
if ((cp & 0xFFFE) == 0xFFFE) return 1;
if (cp >= 0xFDD0 && cp <= 0xFDEF) return 1;
return 0;
}
/*
* Quote a string value for logfmt output.
* Input: a Perl SV (character string, may have UTF8 flag).
* Output: a new SV containing the quoted byte string (no UTF8 flag),
* wrapped in double quotes, with proper escaping.
*/
static SV *
quote_string_xs(pTHX_ SV *input)
{
STRLEN len;
const char *s = SvPV(input, len);
bool is_utf8 = cBOOL(SvUTF8(input));
const char *end = s + len;
SV *out;
/* Optimistic pre-allocate: most chars pass through or become 2-char escapes */
out = newSV(len * 2 + 3);
SvPOK_on(out);
sv_catpvn(out, "\"", 1);
while (s < end) {
UV cp;
STRLEN char_len;
if (is_utf8) {
cp = utf8_to_uvchr_buf((const U8 *)s, (const U8 *)end, &char_len);
if (char_len == 0) {
/* Malformed UTF-8, skip byte */
s++;
continue;
}
} else {
cp = (UV)(unsigned char)*s;
( run in 0.542 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )