Cpanel-JSON-XS
view release on metacpan or search on metacpan
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#define NEED_load_module
#define NEED_newCONSTSUB
#define NEED_vload_module
#define NEED_vnewSVpvf
#define NEED_warner
#define NEED_grok_number
#define NEED_grok_numeric_radix
#define NEED_newRV_noinc
#define NEED_sv_2pv_flags
#include "ppport.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <float.h>
#if defined(__BORLANDC__) || defined(_MSC_VER)
# define snprintf _snprintf // C compilers have this in stdio.h
#endif
#ifndef PERL_UNUSED_RESULT
# if defined(__GNUC__) && defined(HASATTRIBUTE_WARN_UNUSED_RESULT)
# define PERL_UNUSED_RESULT(v) STMT_START { __typeof__(v) z = (v); (void)sizeof(z); } STMT_END
# else
# define PERL_UNUSED_RESULT(v) ((void)(v))
# endif
#endif
#if defined(_AIX) && (!defined(HAS_LONG_DOUBLE) || AIX_WORKAROUND)
#define HAVE_NO_POWL
#endif
/* Freebsd 10: It has powl, but it is too bad. strtold is good. RT #101265 */
#if defined(__FreeBSD__) && defined(__clang__) && defined(USE_LONG_DOUBLE)
#define HAVE_BAD_POWL
#endif
#if PERL_VERSION < 22 && defined(HAS_SETLOCALE)
#define NEED_NUMERIC_LOCALE_C
#ifdef I_XLOCALE
#include <xlocale.h>
#endif
#endif
/* FIXME: still a refcount error */
#define HAVE_DECODE_BOM
#define UTF8BOM "\357\273\277" /* EF BB BF */
/* UTF16/32BOM is deprecated, RFC 8259 */
#define UTF16BOM "\377\376" /* FF FE or +UFEFF */
#define UTF16BOM_BE "\376\377" /* FE FF */
#define UTF32BOM "\377\376\000\000" /* FF FE 00 00 or +UFEFF */
#define UTF32BOM_BE "\000\000\376\377" /* 00 00 FE FF */
/* Need to disable broken gcc-9.[0-3] -O1. Assume gcc-10 is also still broken */
/* Only gcc defines __GNUC_PATCHLEVEL__, clang and icc do define __GNUC__ */
#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) && \
(((__GNUC__ == 9) && (__GNUC_MINOR__ <= 3) || \
(__GNUC__ > 9)))
# define BROKEN_GCC_OPT
#endif
/* mingw with USE_LONG_DOUBLE (and implied USE_MINGW_ANSI_STDIO) do use the
non-msvcrt inf/nan stringification in sprintf(). */
#if defined(WIN32) && !defined(__USE_MINGW_ANSI_STDIO) && !defined(USE_LONG_DOUBLE)
/* new ucrtd.dll runtime? We do not probe the runtime or variants in the Makefile.PL yet. */
#define STR_INF "inf"
#define STR_INF2 "inf.0"
#define STR_NAN "nan"
#define STR_QNAN "nan(ind)"
/* old standard msvcrt.dll */
#define STR_INF3 "1.#INF"
#define STR_INF4 "1.#INF.0"
#define STR_NAN2 "1.#IND"
#define STR_QNAN2 "1.#QNAN"
#define HAVE_QNAN
#elif defined(sun) || defined(__sun)
#define STR_INF "Infinity"
#define STR_NAN "NaN"
#elif defined(__hpux)
#define STR_INF "++"
#define STR_NAN "-?"
#define HAVE_NEG_NAN
#define STR_NEG_INF "---"
#define STR_NEG_NAN "?"
#elif defined(_AIX) || defined(_AIX50)
/* xlC compiler: __TOS_AIX__ FIXME: This does not work yet. GH #165 */
#define STR_INF "INF"
#define STR_INF2 "INF.0"
#define HAVE_NEG_NAN
#define STR_NEG_INF "-INF"
#define HAVE_NEG_NAN
#define HAVE_QNAN
#define STR_NAN "NaN"
#define STR_QNAN "NaNQ"
//#define STR_QNAN "NANQ"
#else
#define STR_INF "inf"
#define STR_NAN "nan"
#endif
/* NV_INF compatibility for Perl 5.6 */
#if !defined(NV_INF) && defined(USE_LONG_DOUBLE) && defined(LDBL_INFINITY)
# define NV_INF LDBL_INFINITY
#endif
#if !defined(NV_INF) && defined(DBL_INFINITY)
# define NV_INF (NV)DBL_INFINITY
#endif
#if !defined(NV_INF) && defined(INFINITY)
# define NV_INF (NV)INFINITY
#endif
#if !defined(NV_INF) && defined(INF)
# define NV_INF (NV)INF
dMY_CXT;
dec->cur += 4;
if (typesv)
sv_setiv_mg (typesv, JSON_TYPE_BOOL);
if (dec->json.flags & F_UNBLESSED_BOOL)
return newSVsv (&PL_sv_yes);
return newSVsv(MY_CXT.json_true);
}
else
ERR ("'true' expected");
break;
case 'f':
if (dec->end - dec->cur >= 5 && memEQc(dec->cur, "false"))
{
dMY_CXT;
dec->cur += 5;
if (typesv)
sv_setiv_mg (typesv, JSON_TYPE_BOOL);
if (dec->json.flags & F_UNBLESSED_BOOL)
return newSVsv (&PL_sv_no);
return newSVsv(MY_CXT.json_false);
}
else
ERR ("'false' expected");
break;
case 'n':
if (dec->end - dec->cur >= 4 && memEQc(dec->cur, "null"))
{
dec->cur += 4;
if (typesv)
sv_setiv_mg (typesv, JSON_TYPE_NULL);
return newSVsv(&PL_sv_undef);
}
else
ERR ("'null' expected");
break;
default:
ERR ("malformed JSON string, neither tag, array, object, number, string or atom");
break;
}
fail:
return 0;
}
/* decode UTF32-LE/... to UTF-8:
$utf8 = Encode::decode("UTF-32", $string); */
static SV *
decode_bom(pTHX_ const char* encoding, SV* string, STRLEN offset)
{
dSP;
I32 items;
PERL_UNUSED_ARG(offset);
#ifndef HAVE_DECODE_BOM
croak ("Cannot handle multibyte BOM yet");
return string;
#else
ENTER;
#if PERL_VERSION > 18
/* on older perls (<5.20) this corrupts ax */
Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvs("Encode"),
NULL, NULL, NULL);
#else
if (!get_cvs("Encode::decode", GV_NOADD_NOINIT|GV_NO_SVGMAGIC))
croak("Multibyte BOM needs to use Encode before");
#endif
LEAVE;
ENTER;
PUSHMARK(SP);
XPUSHs(newSVpvn(encoding, strlen(encoding)));
XPUSHs(string);
PUTBACK;
/* Calling Encode::Unicode::decode_xs would be faster, but we'd need the blessed
enc hash from find_encoding() then. e.g. $Encode::Encoding{'UTF-16LE'}
bless {Name=>UTF-16,size=>2,endian=>'',ucs2=>undef}, 'Encode::Unicode';
And currenty we enjoy the simplicity of the BOM offset advance by
endianness autodetection.
*/
items = call_sv(MUTABLE_SV(get_cvs("Encode::decode",
GV_NOADD_NOINIT|GV_NO_SVGMAGIC)), G_SCALAR);
SPAGAIN;
if (items >= 0 && SvPOK(TOPs)) {
LEAVE;
SvUTF8_on(TOPs);
return POPs;
} else {
LEAVE;
return string;
}
#endif
}
static SV *
decode_json (pTHX_ SV *string, JSON *json, STRLEN *offset_return, SV *typesv)
{
dec_t dec;
SV *sv;
STRLEN len, offset = 0;
int converted = 0;
/*dMY_CXT;*/
if (!json_validate (json))
croak (NULL);
/* work around bugs in 5.10 where manipulating magic values
* makes perl ignore the magic in subsequent accesses.
* also make a copy of non-PV values, to get them into a clean
* state (SvPV should do that, but it's buggy, see below).
* But breaks decode_prefix with offset.
*/
/*SvGETMAGIC (string);*/
if (SvMAGICAL (string) || !SvPOK (string) || SvIsCOW_shared_hash(string))
string = sv_2mortal (newSVsv (string));
SvUPGRADE (string, SVt_PV);
/* work around a bug in perl 5.10, which causes SvCUR to fail an
* assertion with -DDEBUGGING, although SvCUR is documented to
* return the xpv_cur field which certainly exists after upgrading.
* according to nicholas clark, calling SvPOK fixes this.
* But it doesn't fix it, so try another workaround, call SvPV_nolen
* and hope for the best.
* Damnit, SvPV_nolen still trips over yet another assertion. This
* assertion business is seriously broken, try yet another workaround
* for the broken -DDEBUGGING.
*/
{
#ifdef DEBUGGING
len = SvOK (string) ? sv_len (string) : 0;
#else
len = SvCUR (string);
#endif
if (UNLIKELY(len > json->max_size && json->max_size))
croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
(unsigned long)len, (unsigned long)json->max_size);
}
/* Detect BOM and possibly convert to UTF-8 and set UTF8 flag.
https://tools.ietf.org/html/rfc7159#section-8.1
JSON text SHALL be encoded in UTF-8, UTF-16, or UTF-32.
Byte Order Mark - While section 8.1 states "Implementations MUST
NOT add a byte order mark to the beginning of a JSON text",
"implementations (...) MAY ignore the presence of a byte order
mark rather than treating it as an error". */
if (UNLIKELY(len > 2 && SvPOK(string) && !json->incr_pos)) {
U8 *s = (U8*)SvPVX (string);
if (*s >= 0xEF) {
if (len >= 3 && memEQc(s, UTF8BOM)) {
converted = 1 + (json->flags & F_UTF8);
json->flags |= F_UTF8;
offset = 3;
SvPV_set(string, SvPVX_mutable (string) + 3);
SvCUR_set(string, len - 3);
SvUTF8_on(string);
/* omitting the endian name will skip the BOM in the result */
} else if (len >= 4 && memEQc(s, UTF32BOM)) {
string = decode_bom(aTHX_ "UTF-32", string, 4);
converted = 1 + (json->flags & F_UTF8);
json->flags |= F_UTF8;
} else if (memEQc(s, UTF16BOM)) {
string = decode_bom(aTHX_ "UTF-16", string, 2);
converted = 1 + (json->flags & F_UTF8);
json->flags |= F_UTF8;
} else if (memEQc(s, UTF16BOM_BE)) {
string = decode_bom(aTHX_ "UTF-16", string, 2);
converted = 1 + (json->flags & F_UTF8);
json->flags |= F_UTF8;
}
} else if (UNLIKELY(len >= 4 && !*s && memEQc(s, UTF32BOM_BE))) {
string = decode_bom(aTHX_ "UTF-32", string, 4);
converted = 1 + (json->flags & F_UTF8);
json->flags |= F_UTF8;
}
}
if (LIKELY(!converted)) {
if (DECODE_WANTS_OCTETS (json))
sv_utf8_downgrade (string, 0);
else
sv_utf8_upgrade (string);
}
/* should basically be a NOP but needed for 5.6 with undef */
if (!SvPOK(string))
SvGROW (string, SvCUR (string) + 1);
dec.json = *json;
dec.cur = SvPVX (string);
dec.end = SvEND (string);
dec.err = 0;
dec.depth = 0;
if (dec.json.cb_object || dec.json.cb_sk_object)
dec.json.flags |= F_HOOK;
*dec.end = 0; /* this should basically be a nop, too, but make sure it's there */
decode_ws (&dec);
sv = decode_sv (aTHX_ &dec, typesv);
if (offset_return) {
if (dec.cur < SvPVX (string) || dec.cur > SvEND (string))
*offset_return = 0;
else
*offset_return = dec.cur - SvPVX (string);
}
if (!(offset_return || !sv))
{
/* check for trailing garbage */
decode_ws (&dec);
if ((dec.end - dec.cur) || *dec.cur)
{
dec.err = "garbage after JSON object";
SvREFCNT_dec (sv);
sv = NULL;
}
}
/* restore old utf8 string with BOM */
if (UNLIKELY(offset)) {
SvPV_set(string, SvPVX_mutable (string) - offset);
SvCUR_set(string, len);
}
if (!sv)
{
SV *uni = sv_newmortal ();
COP cop = *PL_curcop;
if (dec.cur >= dec.end) // overshoot
{
croak ("%s, at character offset %d",
dec.err,
(int)ptr_to_index (aTHX_ string, dec.cur - SvPVX(string)));
}
#if PERL_VERSION >= 8
/* horrible hack to silence warning inside pv_uni_display */
/* TODO: Can be omitted with newer perls */
cop.cop_warnings = pWARN_NONE;
ENTER;
SAVEVPTR (PL_curcop);
PL_curcop = &cop;
pv_uni_display (uni, (U8*)dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
LEAVE;
#endif
croak ("%s, at character offset %d (before \"%s\")",
dec.err,
(int)ptr_to_index (aTHX_ string, dec.cur - SvPVX(string)),
dec.cur < dec.end ? SvPV_nolen (uni) : "(end of string)");
}
if (!(dec.json.flags & F_ALLOW_NONREF) && json_nonref(aTHX_ sv)) {
SvREFCNT_dec (sv);
croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
}
if (UNLIKELY(converted && !(converted - 1))) /* with BOM, and UTF8 was not set */
json->flags &= ~F_UTF8;
return sv_2mortal (sv);
}
/*/////////////////////////////////////////////////////////////////////////// */
/* incremental parser */
/* Note that our good friend gcc-9.x crashes here, which looks like one of the
well-known internal gcc tree-optimizer bugs. */
#ifdef BROKEN_GCC_OPT
// or __attribute__((optimize("no-tree-vectorize")))
__attribute__((optimize("O0")))
#endif
static void
incr_parse (JSON *self)
{
const char *p = SvPVX (self->incr_text) + self->incr_pos;
/* the state machine here is a bit convoluted and could be simplified a lot */
/* but this would make it slower, so... */
for (;;)
{
/*printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D */
switch (self->incr_mode)
{
/* only used for initial whitespace skipping */
case INCR_M_WS:
for (;;)
{
if (*p > 0x20)
{
if (*p == '#')
{
self->incr_mode = INCR_M_C0;
goto incr_m_c;
}
else
{
self->incr_mode = INCR_M_JSON;
goto incr_m_json;
}
}
else if (!*p)
goto interrupt;
++p;
}
/* skip a single char inside a string (for \\-processing) */
case INCR_M_BS:
if (!*p)
goto interrupt;
++p;
self->incr_mode = INCR_M_STR;
goto incr_m_str;
/* inside #-style comments */
case INCR_M_C0:
( run in 0.428 second using v1.01-cache-2.11-cpan-39bf76dae61 )