JSON-YY
view release on metacpan or search on metacpan
# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 0
# endif
#endif
/*
Estimated initial ratio of the JSON data (data_size / value_count).
For example:
data: {"id":12345678,"name":"Harry"}
data_size: 30
value_count: 5
ratio: 6
yyjson uses dynamic memory with a growth factor of 1.5 when reading and writing
JSON, the ratios below are used to determine the initial memory size.
A too large ratio will waste memory, and a too small ratio will cause multiple
memory growths and degrade performance. Currently, these ratios are generated
with some commonly used JSON datasets.
*/
#define YYJSON_READER_ESTIMATED_PRETTY_RATIO 16
#define YYJSON_READER_ESTIMATED_MINIFY_RATIO 6
#define YYJSON_WRITER_ESTIMATED_PRETTY_RATIO 32
#define YYJSON_WRITER_ESTIMATED_MINIFY_RATIO 18
/* The initial and maximum size of the memory pool's chunk in yyjson_mut_doc. */
#define YYJSON_MUT_DOC_STR_POOL_INIT_SIZE 0x100
#define YYJSON_MUT_DOC_STR_POOL_MAX_SIZE 0x10000000
#define YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE (0x10 * sizeof(yyjson_mut_val))
#define YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE (0x1000000 * sizeof(yyjson_mut_val))
/* The minimum size of the dynamic allocator's chunk. */
#define YYJSON_ALC_DYN_MIN_SIZE 0x1000
/* Default value for compile-time options. */
#ifndef YYJSON_DISABLE_READER
#define YYJSON_DISABLE_READER 0
#endif
#ifndef YYJSON_DISABLE_WRITER
#define YYJSON_DISABLE_WRITER 0
#endif
#ifndef YYJSON_DISABLE_INCR_READER
#define YYJSON_DISABLE_INCR_READER 0
#endif
#ifndef YYJSON_DISABLE_UTILS
#define YYJSON_DISABLE_UTILS 0
#endif
#ifndef YYJSON_DISABLE_FAST_FP_CONV
#define YYJSON_DISABLE_FAST_FP_CONV 0
#endif
#ifndef YYJSON_DISABLE_NON_STANDARD
#define YYJSON_DISABLE_NON_STANDARD 0
#endif
#ifndef YYJSON_DISABLE_UTF8_VALIDATION
#define YYJSON_DISABLE_UTF8_VALIDATION 0
#endif
/*==============================================================================
* MARK: - Macros (Private)
*============================================================================*/
/* Macros used for loop unrolling and other purpose. */
#define repeat2(x) { x x }
#define repeat4(x) { x x x x }
#define repeat8(x) { x x x x x x x x }
#define repeat16(x) { x x x x x x x x x x x x x x x x }
#define repeat2_incr(x) { x(0) x(1) }
#define repeat4_incr(x) { x(0) x(1) x(2) x(3) }
#define repeat8_incr(x) { x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) }
#define repeat16_incr(x) { x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) \
x(8) x(9) x(10) x(11) x(12) x(13) x(14) x(15) }
#define repeat_in_1_18(x) { x(1) x(2) x(3) x(4) x(5) x(6) x(7) x(8) \
x(9) x(10) x(11) x(12) x(13) x(14) x(15) x(16) \
x(17) x(18) }
/* Macros used to provide branch prediction information for compiler. */
#undef likely
#define likely(x) yyjson_likely(x)
#undef unlikely
#define unlikely(x) yyjson_unlikely(x)
/* Macros used to provide inline information for compiler. */
#undef static_inline
#define static_inline static yyjson_inline
#undef static_noinline
#define static_noinline static yyjson_noinline
/* Macros for min and max. */
#undef yyjson_min
#define yyjson_min(x, y) ((x) < (y) ? (x) : (y))
#undef yyjson_max
#define yyjson_max(x, y) ((x) > (y) ? (x) : (y))
/* Used to write u64 literal for C89 which doesn't support "ULL" suffix. */
#undef U64
#define U64(hi, lo) ((((u64)hi##UL) << 32U) + lo##UL)
#undef U32
#define U32(hi) ((u32)(hi##UL))
/* Used to cast away (remove) const qualifier. */
#define constcast(type) (type)(void *)(size_t)(const void *)
/*
Compiler barriers for single variables.
These macros inform GCC that a read or write access to the given memory
location will occur, preventing certain compiler optimizations or reordering
around the access to 'val'. They do not emit any actual instructions.
This is useful when GCC's default optimization strategies are suboptimal and
precise control over memory access patterns is required.
These barriers are not needed when using Clang or MSVC.
*/
#if YYJSON_IS_REAL_GCC
# define gcc_load_barrier(val) __asm__ volatile(""::"m"(val))
# define gcc_store_barrier(val) __asm__ volatile("":"=m"(val))
# define gcc_full_barrier(val) __asm__ volatile("":"=m"(val):"m"(val))
#else
# define gcc_load_barrier(val)
# define gcc_store_barrier(val)
# define gcc_full_barrier(val)
#endif
/*==============================================================================
* MARK: - Constants (Private)
*============================================================================*/
/* Common error messages. */
#define MSG_FOPEN "failed to open file"
#define MSG_FREAD "failed to read file"
#define MSG_FWRITE "failed to write file"
#define MSG_FCLOSE "failed to close file"
#define MSG_MALLOC "failed to allocate memory"
#define MSG_CHAR_T "invalid literal, expected 'true'"
#define MSG_CHAR_F "invalid literal, expected 'false'"
#define MSG_CHAR_N "invalid literal, expected 'null'"
#define MSG_CHAR "unexpected character, expected a JSON value"
#define MSG_ARR_END "unexpected character, expected ',' or ']'"
#define MSG_OBJ_KEY "unexpected character, expected a string key"
#define MSG_OBJ_SEP "unexpected character, expected ':' after key"
#define MSG_OBJ_END "unexpected character, expected ',' or '}'"
#define MSG_GARBAGE "unexpected content after document"
#define MSG_NOT_END "unexpected end of data"
#define MSG_COMMENT "unclosed multiline comment"
#define MSG_COMMA "trailing comma is not allowed"
#define MSG_NAN_INF "nan or inf number is not allowed"
0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0
};
/** Load 4 hex characters to `u16`, return true on valid input. */
static_inline bool hex_load_4(const u8 *src, u16 *dst) {
u16 c0 = hex_conv_table[src[0]];
u16 c1 = hex_conv_table[src[1]];
u16 c2 = hex_conv_table[src[2]];
u16 c3 = hex_conv_table[src[3]];
u16 t0 = (u16)((c0 << 8) | c2);
u16 t1 = (u16)((c1 << 8) | c3);
*dst = (u16)((t0 << 4) | t1);
return ((t0 | t1) & (u16)0xF0F0) == 0;
}
/** Load 2 hex characters to `u8`, return true on valid input. */
static_inline bool hex_load_2(const u8 *src, u8 *dst) {
u8 c0 = hex_conv_table[src[0]];
u8 c1 = hex_conv_table[src[1]];
*dst = (u8)((c0 << 4) | c1);
return ((c0 | c1) & 0xF0) == 0;
}
/** Match a hexadecimal numeric character: [0-9a-fA-F]. */
static_inline bool char_is_hex(u8 c) {
return hex_conv_table[c] != 0xF0;
}
/*==============================================================================
* MARK: - UTF8 Validation (Private)
* Each Unicode code point is encoded using 1 to 4 bytes in UTF-8.
* Validation is performed using a 4-byte mask and pattern-based approach,
* which requires the input data to be padded with four zero bytes at the end.
*============================================================================*/
/* Macro for concatenating four u8 into a u32 and keeping the byte order. */
#if YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN
# define utf8_seq_def(name, a, b, c, d) \
static const u32 utf8_seq_##name = 0x##d##c##b##a##UL;
# define utf8_seq(name) utf8_seq_##name
#elif YYJSON_ENDIAN == YYJSON_BIG_ENDIAN
# define utf8_seq_def(name, a, b, c, d) \
static const u32 utf8_seq_##name = 0x##a##b##c##d##UL;
# define utf8_seq(name) utf8_seq_##name
#else
# define utf8_seq_def(name, a, b, c, d) \
static const v32_uni utf8_uni_##name = {{ 0x##a, 0x##b, 0x##c, 0x##d }};
# define utf8_seq(name) utf8_uni_##name.u
#endif
/*
1-byte sequence (U+0000 to U+007F)
bit min [.......0] (U+0000)
bit max [.1111111] (U+007F)
bit mask [x.......] (80)
bit pattern [0.......] (00)
*/
utf8_seq_def(b1_mask, 80, 00, 00, 00)
utf8_seq_def(b1_patt, 00, 00, 00, 00)
#define is_utf8_seq1(uni) ( \
((uni & utf8_seq(b1_mask)) == utf8_seq(b1_patt)) )
/*
2-byte sequence (U+0080 to U+07FF)
bit min [......10 ..000000] (U+0080)
bit max [...11111 ..111111] (U+07FF)
bit mask [xxx..... xx......] (E0 C0)
bit pattern [110..... 10......] (C0 80)
bit require [...xxxx. ........] (1E 00)
*/
utf8_seq_def(b2_mask, E0, C0, 00, 00)
utf8_seq_def(b2_patt, C0, 80, 00, 00)
utf8_seq_def(b2_requ, 1E, 00, 00, 00)
#define is_utf8_seq2(uni) ( \
((uni & utf8_seq(b2_mask)) == utf8_seq(b2_patt)) && \
((uni & utf8_seq(b2_requ))) )
/*
3-byte sequence (U+0800 to U+FFFF)
bit min [........ ..100000 ..000000] (U+0800)
bit max [....1111 ..111111 ..111111] (U+FFFF)
bit mask [xxxx.... xx...... xx......] (F0 C0 C0)
bit pattern [1110.... 10...... 10......] (E0 80 80)
bit require [....xxxx ..x..... ........] (0F 20 00)
3-byte invalid sequence, reserved for surrogate halves (U+D800 to U+DFFF)
bit min [....1101 ..100000 ..000000] (U+D800)
bit max [....1101 ..111111 ..111111] (U+DFFF)
bit mask [....xxxx ..x..... ........] (0F 20 00)
bit pattern [....1101 ..1..... ........] (0D 20 00)
*/
utf8_seq_def(b3_mask, F0, C0, C0, 00)
utf8_seq_def(b3_patt, E0, 80, 80, 00)
utf8_seq_def(b3_requ, 0F, 20, 00, 00)
utf8_seq_def(b3_erro, 0D, 20, 00, 00)
#define is_utf8_seq3(uni) ( \
( run in 3.101 seconds using v1.01-cache-2.11-cpan-483215c6ad5 )