JSON-YY
view release on metacpan or search on metacpan
#if YYJSON_IS_REAL_GCC
# define gcc_load_barrier(val) __asm__ volatile(""::"m"(val))
# define gcc_store_barrier(val) __asm__ volatile("":"=m"(val))
# define gcc_full_barrier(val) __asm__ volatile("":"=m"(val):"m"(val))
#else
# define gcc_load_barrier(val)
# define gcc_store_barrier(val)
# define gcc_full_barrier(val)
#endif
/*==============================================================================
* MARK: - Constants (Private)
*============================================================================*/
/* Common error messages. */
#define MSG_FOPEN "failed to open file"
#define MSG_FREAD "failed to read file"
#define MSG_FWRITE "failed to write file"
#define MSG_FCLOSE "failed to close file"
#define MSG_MALLOC "failed to allocate memory"
#define MSG_CHAR_T "invalid literal, expected 'true'"
#define MSG_CHAR_F "invalid literal, expected 'false'"
#define MSG_CHAR_N "invalid literal, expected 'null'"
#define MSG_CHAR "unexpected character, expected a JSON value"
#define MSG_ARR_END "unexpected character, expected ',' or ']'"
#define MSG_OBJ_KEY "unexpected character, expected a string key"
#define MSG_OBJ_SEP "unexpected character, expected ':' after key"
#define MSG_OBJ_END "unexpected character, expected ',' or '}'"
#define MSG_GARBAGE "unexpected content after document"
#define MSG_NOT_END "unexpected end of data"
#define MSG_COMMENT "unclosed multiline comment"
#define MSG_COMMA "trailing comma is not allowed"
#define MSG_NAN_INF "nan or inf number is not allowed"
#define MSG_ERR_TYPE "invalid JSON value type"
#define MSG_ERR_BOM "UTF-8 byte order mark (BOM) is not supported"
#define MSG_ERR_UTF8 "invalid utf-8 encoding in string"
#define MSG_ERR_UTF16 "UTF-16 encoding is not supported"
#define MSG_ERR_UTF32 "UTF-32 encoding is not supported"
/* U64 constant values */
#undef U64_MAX
#define U64_MAX U64(0xFFFFFFFF, 0xFFFFFFFF)
#undef I64_MAX
#define I64_MAX U64(0x7FFFFFFF, 0xFFFFFFFF)
#undef USIZE_MAX
#define USIZE_MAX ((usize)(~(usize)0))
/* Maximum number of digits for reading u32/u64/usize safety (not overflow). */
#undef U32_SAFE_DIG
#define U32_SAFE_DIG 9 /* u32 max is 4294967295, 10 digits */
#undef U64_SAFE_DIG
#define U64_SAFE_DIG 19 /* u64 max is 18446744073709551615, 20 digits */
#undef USIZE_SAFE_DIG
#define USIZE_SAFE_DIG (sizeof(usize) == 8 ? U64_SAFE_DIG : U32_SAFE_DIG)
/* Inf bits (positive) */
#define F64_BITS_INF U64(0x7FF00000, 0x00000000)
/* NaN bits (quiet NaN, no payload, no sign) */
#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
#define F64_BITS_NAN U64(0x7FF7FFFF, 0xFFFFFFFF)
#else
#define F64_BITS_NAN U64(0x7FF80000, 0x00000000)
#endif
/* maximum significant digits count in decimal when reading double number */
#define F64_MAX_DEC_DIG 768
/* maximum decimal power of double number (1.7976931348623157e308) */
#define F64_MAX_DEC_EXP 308
/* minimum decimal power of double number (4.9406564584124654e-324) */
#define F64_MIN_DEC_EXP (-324)
/* maximum binary power of double number */
#define F64_MAX_BIN_EXP 1024
/* minimum binary power of double number */
#define F64_MIN_BIN_EXP (-1021)
/* float/double number bits */
#define F32_BITS 32
#define F64_BITS 64
/* float/double number exponent part bits */
#define F32_EXP_BITS 8
#define F64_EXP_BITS 11
/* float/double number significand part bits */
#define F32_SIG_BITS 23
#define F64_SIG_BITS 52
/* float/double number significand part bits (with 1 hidden bit) */
#define F32_SIG_FULL_BITS 24
#define F64_SIG_FULL_BITS 53
/* float/double number significand bit mask */
#define F32_SIG_MASK U32(0x007FFFFF)
#define F64_SIG_MASK U64(0x000FFFFF, 0xFFFFFFFF)
/* float/double number exponent bit mask */
#define F32_EXP_MASK U32(0x7F800000)
#define F64_EXP_MASK U64(0x7FF00000, 0x00000000)
/* float/double number exponent bias */
#define F32_EXP_BIAS 127
#define F64_EXP_BIAS 1023
/* float/double number significant digits count in decimal */
#define F32_DEC_DIG 9
#define F64_DEC_DIG 17
/* buffer length required for float/double number writer */
#define FP_BUF_LEN 40
/* maximum length of a number in incremental parsing */
#define INCR_NUM_MAX_LEN 1024
static_inline bool read_false(u8 **ptr, yyjson_val *val) {
u8 *cur = *ptr;
if (likely(byte_match_4(cur + 1, "alse"))) {
val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE;
*ptr = cur + 5;
return true;
}
return false;
}
/** Read `null` literal, `*ptr[0]` should be `n`. */
static_inline bool read_null(u8 **ptr, yyjson_val *val) {
u8 *cur = *ptr;
if (likely(byte_match_4(cur, "null"))) {
val->tag = YYJSON_TYPE_NULL;
*ptr = cur + 4;
return true;
}
return false;
}
/** Read `Inf` or `Infinity` literal (ignoring case). */
static_inline bool read_inf(u8 **ptr, u8 **pre,
yyjson_read_flag flg, yyjson_val *val) {
u8 *hdr = *ptr;
u8 *cur = *ptr;
u8 **end = ptr;
bool sign = (*cur == '-');
if (*cur == '+' && !has_allow(EXT_NUMBER)) return false;
cur += char_is_sign(*cur);
if (char_to_lower(cur[0]) == 'i' &&
char_to_lower(cur[1]) == 'n' &&
char_to_lower(cur[2]) == 'f') {
if (char_to_lower(cur[3]) == 'i') {
if (char_to_lower(cur[4]) == 'n' &&
char_to_lower(cur[5]) == 'i' &&
char_to_lower(cur[6]) == 't' &&
char_to_lower(cur[7]) == 'y') {
cur += 8;
} else {
return false;
}
} else {
cur += 3;
}
*end = cur;
if (has_flg(NUMBER_AS_RAW)) {
**pre = '\0'; /* add null-terminator for previous raw string */
*pre = cur; /* save end position for current raw string */
val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
val->uni.str = (const char *)hdr;
} else {
val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
val->uni.u64 = f64_bits_inf(sign);
}
return true;
}
return false;
}
/** Read `NaN` literal (ignoring case). */
static_inline bool read_nan(u8 **ptr, u8 **pre,
yyjson_read_flag flg, yyjson_val *val) {
u8 *hdr = *ptr;
u8 *cur = *ptr;
u8 **end = ptr;
bool sign = (*cur == '-');
if (*cur == '+' && !has_allow(EXT_NUMBER)) return false;
cur += char_is_sign(*cur);
if (char_to_lower(cur[0]) == 'n' &&
char_to_lower(cur[1]) == 'a' &&
char_to_lower(cur[2]) == 'n') {
cur += 3;
*end = cur;
if (has_flg(NUMBER_AS_RAW)) {
**pre = '\0'; /* add null-terminator for previous raw string */
*pre = cur; /* save end position for current raw string */
val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
val->uni.str = (const char *)hdr;
} else {
val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
val->uni.u64 = f64_bits_nan(sign);
}
return true;
}
return false;
}
/** Read `Inf`, `Infinity` or `NaN` literal (ignoring case). */
static_inline bool read_inf_or_nan(u8 **ptr, u8 **pre,
yyjson_read_flag flg, yyjson_val *val) {
if (read_inf(ptr, pre, flg, val)) return true;
if (read_nan(ptr, pre, flg, val)) return true;
return false;
}
/** Read a JSON number as raw string. */
static_noinline bool read_num_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg,
yyjson_val *val, const char **msg) {
#define return_err(_pos, _msg) do { \
*msg = _msg; *end = _pos; return false; \
} while (false)
#define return_raw() do { \
val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \
val->uni.str = (const char *)hdr; \
**pre = '\0'; *pre = cur; *end = cur; return true; \
} while (false)
u8 *hdr = *ptr;
u8 *cur = *ptr;
u8 **end = ptr;
/* skip sign */
cur += (*cur == '-');
/* read first digit, check leading zero */
while (unlikely(!char_is_digit(*cur))) {
if (has_allow(EXT_NUMBER)) {
if (*cur == '+' && cur == hdr) { /* leading `+` sign */
cur++;
continue;
}
if (*cur == '.' && char_is_digit(cur[1])) { /* e.g. '.123' */
goto read_double;
}
}
if (has_allow(INF_AND_NAN)) {
if (read_inf_or_nan(ptr, pre, flg, val)) return true;
}
return_err(cur, "no digit after sign");
}
/* read integral part */
if (*cur == '0') {
cur++;
if (unlikely(char_is_digit(*cur))) {
return_err(cur - 1, "number with leading zero is not allowed");
}
if (!char_is_fp(*cur)) {
if (has_allow(EXT_NUMBER) && char_to_lower(*cur) == 'x') { /* hex */
if (!char_is_hex(*++cur)) return_err(cur, "invalid hex number");
while(char_is_hex(*cur)) cur++;
}
return_raw();
}
} else {
while (char_is_digit(*cur)) cur++;
if (!char_is_fp(*cur)) return_raw();
if (u0_inside != w0_inside) {
*sig_dec = sp * 10 + (w0_inside ? 10 : 0);
*exp_dec = k;
return;
}
}
u1_inside = (lower <= 4 * s);
w1_inside = (upper >= 4 * s + 4);
mid = 4 * s + 2;
round_up = (vb > mid) || (vb == mid && (s & 1) != 0);
*sig_dec = s + ((u1_inside != w1_inside) ? w1_inside : round_up);
*exp_dec = k;
}
/** Convert f64 from binary to decimal (fast but not the shortest).
The input should not be 0, inf, nan. */
static_inline void f64_bin_to_dec_fast(u64 sig_raw, u32 exp_raw,
u64 sig_bin, i32 exp_bin,
u64 *sig_dec, i32 *exp_dec,
bool *round_up) {
u64 cb, p10_hi, p10_lo, s_hi, s_lo;
i32 k, h;
bool irregular, u;
irregular = (sig_raw == 0 && exp_raw > 1);
/* k = floor(exp_bin * log10(2) + (irregular ? log10(3.0 / 4.0) : 0)); */
/* h = exp_bin + floor(log2(10) * -k) + 1; (h = 1/2/3/4) */
k = (i32)(exp_bin * 315653 - (irregular ? 131237 : 0)) >> 20;
h = exp_bin + ((-k * 217707) >> 16);
pow10_table_get_sig(-k, &p10_hi, &p10_lo);
/* sig_bin << (1/2/3/4) */
cb = sig_bin << (h + 1);
u128_mul(cb, p10_lo, &s_hi, &s_lo);
u128_mul_add(cb, p10_hi, s_hi, &s_hi, &s_lo);
/* round up */
u = s_lo >= (irregular ? U64(0x55555555, 0x55555555) : ((u64)1 << 63));
*sig_dec = s_hi + u;
*exp_dec = k;
*round_up = u;
return;
}
/** Write inf/nan if allowed. */
static_inline u8 *write_inf_or_nan(u8 *buf, yyjson_write_flag flg,
u64 sig_raw, bool sign) {
if (has_flg(INF_AND_NAN_AS_NULL)) {
byte_copy_4(buf, "null");
return buf + 4;
}
if (has_allow(INF_AND_NAN)) {
if (sig_raw == 0) {
buf[0] = '-';
buf += sign;
byte_copy_8(buf, "Infinity");
return buf + 8;
} else {
byte_copy_4(buf, "NaN");
return buf + 3;
}
}
return NULL;
}
/**
Write a float number (requires 40 bytes buffer).
We follow the ECMAScript specification for printing floating-point numbers,
similar to `Number.prototype.toString()`, but with the following changes:
1. Keep the negative sign of `-0.0` to preserve input information.
2. Keep decimal point to indicate the number is floating point.
3. Remove positive sign in the exponent part.
*/
static_noinline u8 *write_f32_raw(u8 *buf, u64 raw_f64,
yyjson_write_flag flg) {
u32 sig_bin, sig_dec, sig_raw;
i32 exp_bin, exp_dec, sig_len, dot_ofs;
u32 exp_raw, raw;
u8 *end;
bool sign;
/* cast double to float */
raw = f32_to_bits(f64_to_f32(f64_from_bits(raw_f64)));
/* decode raw bytes from IEEE-754 double format. */
sign = (bool)(raw >> (F32_BITS - 1));
sig_raw = raw & F32_SIG_MASK;
exp_raw = (raw & F32_EXP_MASK) >> F32_SIG_BITS;
/* return inf or nan */
if (unlikely(exp_raw == ((u32)1 << F32_EXP_BITS) - 1)) {
return write_inf_or_nan(buf, flg, sig_raw, sign);
}
/* add sign for all finite number */
buf[0] = '-';
buf += sign;
/* return zero */
if ((raw << 1) == 0) {
byte_copy_4(buf, "0.0");
return buf + 3;
}
if (likely(exp_raw != 0)) {
/* normal number */
sig_bin = sig_raw | ((u32)1 << F32_SIG_BITS);
exp_bin = (i32)exp_raw - F32_EXP_BIAS - F32_SIG_BITS;
/* fast path for small integer number without fraction */
if ((-F32_SIG_BITS <= exp_bin && exp_bin <= 0) &&
(u64_tz_bits(sig_bin) >= (u32)-exp_bin)) {
sig_dec = sig_bin >> -exp_bin; /* range: [1, 0xFFFFFF] */
buf = write_u32_len_1_to_8(sig_dec, buf);
byte_copy_2(buf, ".0");
return buf + 2;
}
/* binary to decimal */
/* remove trailing zeros */
buf += dot_set_pos + 2;
buf = yyjson_max(buf, num_end);
buf -= *(buf - 1) == '0'; /* branchless for last zero */
buf -= *(buf - 1) == '0'; /* branchless for second last zero */
while (*(buf - 1) == '0') buf--; /* for unlikely more zeros */
buf += *(buf - 1) == '.'; /* keep a zero after dot */
return buf;
} else {
/* binary to decimal */
f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin,
&sig_dec, &exp_dec);
/* the sig length is 16 or 17 */
sig_len = 16 + (sig_dec >= (u64)100000000 * 100000000);
/* write with scientific notation, e.g. 1.234e56 */
end = write_u64_len_16_to_17_trim(sig_dec, buf + 1);
end -= (end == buf + 2); /* remove '.0', e.g. 2.0e34 -> 2e34 */
exp_dec += sig_len - 1;
buf[0] = buf[1];
buf[1] = '.';
return write_f64_exp(exp_dec, end);
}
} else {
/* subnormal number */
byte_copy_4(buf, "0.0");
return buf + 3;
}
}
#else /* FP_WRITER */
#if YYJSON_MSC_VER >= 1400
#define snprintf_num(buf, len, fmt, dig, val) \
sprintf_s((char *)buf, len, fmt, dig, val)
#elif defined(snprintf) || (YYJSON_STDC_VER >= 199901L)
#define snprintf_num(buf, len, fmt, dig, val) \
snprintf((char *)buf, len, fmt, dig, val)
#else
#define snprintf_num(buf, len, fmt, dig, val) \
sprintf((char *)buf, fmt, dig, val)
#endif
static_noinline u8 *write_fp_reformat(u8 *buf, int len,
yyjson_write_flag flg, bool fixed) {
u8 *cur = buf;
if (unlikely(len < 1)) return NULL;
cur += (*cur == '-');
if (unlikely(!char_is_digit(*cur))) {
/* nan, inf, or bad output */
if (has_flg(INF_AND_NAN_AS_NULL)) {
byte_copy_4(buf, "null");
return buf + 4;
} else if (has_allow(INF_AND_NAN)) {
if (*cur == 'i') {
byte_copy_8(cur, "Infinity");
return cur + 8;
} else if (*cur == 'n') {
byte_copy_4(buf, "NaN");
return buf + 3;
}
}
return NULL;
} else {
/* finite number */
u8 *end = buf + len, *dot = NULL, *exp = NULL;
/*
The snprintf() function is locale-dependent. For currently known
locales, (en, zh, ja, ko, am, he, hi) use '.' as the decimal point,
while other locales use ',' as the decimal point. we need to replace
',' with '.' to avoid the locale setting.
*/
for (; cur < end; cur++) {
switch (*cur) {
case ',': *cur = '.'; /* fallthrough */
case '.': dot = cur; break;
case 'e': exp = cur; break;
default: break;
}
}
if (fixed) {
/* remove trailing zeros */
while (*(end - 1) == '0') end--;
end += *(end - 1) == '.';
} else {
if (!dot && !exp) {
/* add decimal point, e.g. 123 -> 123.0 */
byte_copy_2(end, ".0");
end += 2;
} else if (exp) {
cur = exp + 1;
/* remove positive sign in the exponent part */
if (*cur == '+') {
memmove(cur, cur + 1, (usize)(end - cur - 1));
end--;
}
cur += (*cur == '-');
/* remove leading zeros in the exponent part */
if (*cur == '0') {
u8 *hdr = cur++;
while (*cur == '0') cur++;
memmove(hdr, cur, (usize)(end - cur));
end -= (usize)(cur - hdr);
}
}
}
return end;
}
}
/** Write a double number (requires 40 bytes buffer). */
static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) {
#if defined(DBL_DECIMAL_DIG) && DBL_DECIMAL_DIG < F64_DEC_DIG
int dig = DBL_DECIMAL_DIG;
#else
int dig = F64_DEC_DIG;
#endif
f64 val = f64_from_bits(raw);
( run in 1.108 second using v1.01-cache-2.11-cpan-39bf76dae61 )