Cpanel-JSON-XS

 view release on metacpan or  search on metacpan

XS.xs  view on Meta::CPAN

#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#define NEED_load_module
#define NEED_newCONSTSUB
#define NEED_vload_module
#define NEED_vnewSVpvf
#define NEED_warner
#define NEED_grok_number
#define NEED_grok_numeric_radix
#define NEED_newRV_noinc
#define NEED_sv_2pv_flags
#include "ppport.h"

#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <float.h>

#if defined(__BORLANDC__) || defined(_MSC_VER)
# define snprintf _snprintf // C compilers have this in stdio.h
#endif

#ifndef PERL_UNUSED_RESULT
#  if defined(__GNUC__) && defined(HASATTRIBUTE_WARN_UNUSED_RESULT)
#    define PERL_UNUSED_RESULT(v) STMT_START { __typeof__(v) z = (v); (void)sizeof(z); } STMT_END
#  else
#    define PERL_UNUSED_RESULT(v) ((void)(v))
#  endif
#endif

#if defined(_AIX) && (!defined(HAS_LONG_DOUBLE) || AIX_WORKAROUND)
#define HAVE_NO_POWL
#endif

/* Freebsd 10: It has powl, but it is too bad. strtold is good. RT #101265 */
#if defined(__FreeBSD__) && defined(__clang__) && defined(USE_LONG_DOUBLE)
#define HAVE_BAD_POWL
#endif

#if PERL_VERSION < 22 && defined(HAS_SETLOCALE)
#define NEED_NUMERIC_LOCALE_C
#ifdef I_XLOCALE
#include <xlocale.h>
#endif
#endif

/* FIXME: still a refcount error */
#define HAVE_DECODE_BOM
#define UTF8BOM     "\357\273\277"      /* EF BB BF */
/* UTF16/32BOM is deprecated, RFC 8259 */
#define UTF16BOM    "\377\376"          /* FF FE or +UFEFF */
#define UTF16BOM_BE "\376\377"          /* FE FF */
#define UTF32BOM    "\377\376\000\000"  /* FF FE 00 00 or +UFEFF */
#define UTF32BOM_BE "\000\000\376\377"  /* 00 00 FE FF */

/* Need to disable broken gcc-9.[0-3] -O1. Assume gcc-10 is also still broken */
/* Only gcc defines __GNUC_PATCHLEVEL__, clang and icc do define __GNUC__ */
#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) && \
    (((__GNUC__ == 9) && (__GNUC_MINOR__ <= 3) || \
      (__GNUC__ > 9)))
#  define BROKEN_GCC_OPT
#endif

/* mingw with USE_LONG_DOUBLE (and implied USE_MINGW_ANSI_STDIO) do use the
   non-msvcrt inf/nan stringification in sprintf(). */
#if defined(WIN32) && !defined(__USE_MINGW_ANSI_STDIO) && !defined(USE_LONG_DOUBLE)
/* new ucrtd.dll runtime? We do not probe the runtime or variants in the Makefile.PL yet. */
#define STR_INF "inf"
#define STR_INF2 "inf.0"
#define STR_NAN "nan"
#define STR_QNAN "nan(ind)"
/* old standard msvcrt.dll */
#define STR_INF3 "1.#INF"
#define STR_INF4 "1.#INF.0"
#define STR_NAN2 "1.#IND"
#define STR_QNAN2 "1.#QNAN"
#define HAVE_QNAN
#elif defined(sun) || defined(__sun)
#define STR_INF "Infinity"
#define STR_NAN "NaN"
#elif defined(__hpux)
#define STR_INF "++"
#define STR_NAN "-?"
#define HAVE_NEG_NAN
#define STR_NEG_INF "---"
#define STR_NEG_NAN "?"
#elif defined(_AIX) || defined(_AIX50)
/* xlC compiler: __TOS_AIX__ FIXME: This does not work yet. GH #165 */
#define STR_INF "INF"
#define STR_INF2 "INF.0"
#define HAVE_NEG_NAN
#define STR_NEG_INF "-INF"
#define HAVE_NEG_NAN
#define HAVE_QNAN
#define STR_NAN "NaN"
#define STR_QNAN "NaNQ"
//#define STR_QNAN "NANQ"
#else
#define STR_INF "inf"
#define STR_NAN "nan"
#endif

/* NV_INF compatibility for Perl 5.6 */
#if !defined(NV_INF) && defined(USE_LONG_DOUBLE) && defined(LDBL_INFINITY)
#  define NV_INF LDBL_INFINITY
#endif
#if !defined(NV_INF) && defined(DBL_INFINITY)
#  define NV_INF (NV)DBL_INFINITY
#endif
#if !defined(NV_INF) && defined(INFINITY)
#  define NV_INF (NV)INFINITY
#endif
#if !defined(NV_INF) && defined(INF)
#  define NV_INF (NV)INF

XS.xs  view on Meta::CPAN

            dMY_CXT;
            dec->cur += 4;
            if (typesv)
              sv_setiv_mg (typesv, JSON_TYPE_BOOL);
            if (dec->json.flags & F_UNBLESSED_BOOL)
              return newSVsv (&PL_sv_yes);
            return newSVsv(MY_CXT.json_true);
          }
        else
          ERR ("'true' expected");

        break;

      case 'f':
        if (dec->end - dec->cur >= 5 && memEQc(dec->cur, "false"))
          {
            dMY_CXT;
            dec->cur += 5;
            if (typesv)
              sv_setiv_mg (typesv, JSON_TYPE_BOOL);
            if (dec->json.flags & F_UNBLESSED_BOOL)
              return newSVsv (&PL_sv_no);
            return newSVsv(MY_CXT.json_false);
          }
        else
          ERR ("'false' expected");

        break;

      case 'n':
        if (dec->end - dec->cur >= 4 && memEQc(dec->cur, "null"))
          {
            dec->cur += 4;
            if (typesv)
              sv_setiv_mg (typesv, JSON_TYPE_NULL);
            return newSVsv(&PL_sv_undef);
          }
        else
          ERR ("'null' expected");

        break;

      default:
        ERR ("malformed JSON string, neither tag, array, object, number, string or atom");
        break;
    }

fail:
  return 0;
}

/* decode UTF32-LE/... to UTF-8:
   $utf8 = Encode::decode("UTF-32", $string); */
static SV *
decode_bom(pTHX_ const char* encoding, SV* string, STRLEN offset)
{
  dSP;
  I32 items;
  PERL_UNUSED_ARG(offset);

#ifndef HAVE_DECODE_BOM
  croak ("Cannot handle multibyte BOM yet");
  return string;
#else
  ENTER;
#if PERL_VERSION > 18
  /* on older perls (<5.20) this corrupts ax */
  Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvs("Encode"),
                   NULL, NULL, NULL);
#else
  if (!get_cvs("Encode::decode", GV_NOADD_NOINIT|GV_NO_SVGMAGIC))
    croak("Multibyte BOM needs to use Encode before");
#endif
  LEAVE;
  ENTER;
  PUSHMARK(SP);
  XPUSHs(newSVpvn(encoding, strlen(encoding)));
  XPUSHs(string);
  PUTBACK;
  /* Calling Encode::Unicode::decode_xs would be faster, but we'd need the blessed
     enc hash from find_encoding() then. e.g. $Encode::Encoding{'UTF-16LE'}
     bless {Name=>UTF-16,size=>2,endian=>'',ucs2=>undef}, 'Encode::Unicode';
     And currenty we enjoy the simplicity of the BOM offset advance by 
     endianness autodetection.
   */
  items = call_sv(MUTABLE_SV(get_cvs("Encode::decode",
              GV_NOADD_NOINIT|GV_NO_SVGMAGIC)), G_SCALAR);
  SPAGAIN;
  if (items >= 0 && SvPOK(TOPs)) {
    LEAVE;
    SvUTF8_on(TOPs);
    return POPs;
  } else {
    LEAVE;
    return string;
  }
#endif
}

static SV *
decode_json (pTHX_ SV *string, JSON *json, STRLEN *offset_return, SV *typesv)
{
  dec_t dec;
  SV *sv;
  STRLEN len, offset = 0;
  int converted = 0;
  /*dMY_CXT;*/

  if (!json_validate (json))
      croak (NULL);
  /* work around bugs in 5.10 where manipulating magic values
   * makes perl ignore the magic in subsequent accesses.
   * also make a copy of non-PV values, to get them into a clean
   * state (SvPV should do that, but it's buggy, see below).
   * But breaks decode_prefix with offset.
   */
  /*SvGETMAGIC (string);*/
  if (SvMAGICAL (string) || !SvPOK (string) || SvIsCOW_shared_hash(string))
    string = sv_2mortal (newSVsv (string));

  SvUPGRADE (string, SVt_PV);

  /* work around a bug in perl 5.10, which causes SvCUR to fail an
   * assertion with -DDEBUGGING, although SvCUR is documented to
   * return the xpv_cur field which certainly exists after upgrading.
   * according to nicholas clark, calling SvPOK fixes this.
   * But it doesn't fix it, so try another workaround, call SvPV_nolen
   * and hope for the best.
   * Damnit, SvPV_nolen still trips over yet another assertion. This
   * assertion business is seriously broken, try yet another workaround
   * for the broken -DDEBUGGING.
   */
  {
#ifdef DEBUGGING
    len = SvOK (string) ? sv_len (string) : 0;
#else
    len = SvCUR (string);
#endif

    if (UNLIKELY(len > json->max_size && json->max_size))
      croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
             (unsigned long)len, (unsigned long)json->max_size);
  }

  /* Detect BOM and possibly convert to UTF-8 and set UTF8 flag.

     https://tools.ietf.org/html/rfc7159#section-8.1
     JSON text SHALL be encoded in UTF-8, UTF-16, or UTF-32.
     Byte Order Mark - While section 8.1 states "Implementations MUST
     NOT add a byte order mark to the beginning of a JSON text",
     "implementations (...) MAY ignore the presence of a byte order
     mark rather than treating it as an error". */
  if (UNLIKELY(len > 2 && SvPOK(string) && !json->incr_pos)) {
    U8 *s = (U8*)SvPVX (string);
    if (*s >= 0xEF) {
      if (len >= 3 && memEQc(s, UTF8BOM)) {
        converted = 1 + (json->flags & F_UTF8);
        json->flags |= F_UTF8;
        offset = 3;
        SvPV_set(string, SvPVX_mutable (string) + 3);
        SvCUR_set(string, len - 3);
        SvUTF8_on(string);
        /* omitting the endian name will skip the BOM in the result */
      } else if (len >= 4 && memEQc(s, UTF32BOM)) {
        string = decode_bom(aTHX_ "UTF-32", string, 4);
        converted = 1 + (json->flags & F_UTF8);
        json->flags |= F_UTF8;
      } else if (memEQc(s, UTF16BOM)) {
        string = decode_bom(aTHX_ "UTF-16", string, 2);
        converted = 1 + (json->flags & F_UTF8);
        json->flags |= F_UTF8;
      } else if (memEQc(s, UTF16BOM_BE)) {
        string = decode_bom(aTHX_ "UTF-16", string, 2);
        converted = 1 + (json->flags & F_UTF8);
        json->flags |= F_UTF8;
      }
    } else if (UNLIKELY(len >= 4 && !*s && memEQc(s, UTF32BOM_BE))) {
        string = decode_bom(aTHX_ "UTF-32", string, 4);
        converted = 1 + (json->flags & F_UTF8);
        json->flags |= F_UTF8;
   }
  }

  if (LIKELY(!converted)) {
    if (DECODE_WANTS_OCTETS (json))
      sv_utf8_downgrade (string, 0);
    else
      sv_utf8_upgrade (string);
  }

  /* should basically be a NOP but needed for 5.6 with undef */
  if (!SvPOK(string))
    SvGROW (string, SvCUR (string) + 1);

  dec.json  = *json;
  dec.cur   = SvPVX (string);
  dec.end   = SvEND (string);
  dec.err   = 0;
  dec.depth = 0;

  if (dec.json.cb_object || dec.json.cb_sk_object)
    dec.json.flags |= F_HOOK;

  *dec.end = 0; /* this should basically be a nop, too, but make sure it's there */

  decode_ws (&dec);
  sv = decode_sv (aTHX_ &dec, typesv);

  if (offset_return) {
    if (dec.cur < SvPVX (string) || dec.cur > SvEND (string))
      *offset_return = 0;
    else
      *offset_return = dec.cur - SvPVX (string);
  }

  if (!(offset_return || !sv))
    {
      /* check for trailing garbage */
      decode_ws (&dec);

      if ((dec.end - dec.cur) || *dec.cur)
        {
          dec.err = "garbage after JSON object";
          SvREFCNT_dec (sv);
          sv = NULL;
        }
    }
  /* restore old utf8 string with BOM */
  if (UNLIKELY(offset)) {
    SvPV_set(string, SvPVX_mutable (string) - offset);
    SvCUR_set(string, len);
  }

  if (!sv)
    {
      SV *uni = sv_newmortal ();
      COP cop = *PL_curcop;
      if (dec.cur >= dec.end) // overshoot
        {
          croak ("%s, at character offset %d",
                 dec.err,
                 (int)ptr_to_index (aTHX_ string, dec.cur - SvPVX(string)));
        }
#if PERL_VERSION >= 8
      /* horrible hack to silence warning inside pv_uni_display */
      /* TODO: Can be omitted with newer perls */
      cop.cop_warnings = pWARN_NONE;
      ENTER;
      SAVEVPTR (PL_curcop);
      PL_curcop = &cop;
      pv_uni_display (uni, (U8*)dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
      LEAVE;
#endif
      croak ("%s, at character offset %d (before \"%s\")",
             dec.err,
             (int)ptr_to_index (aTHX_ string, dec.cur - SvPVX(string)),
             dec.cur < dec.end ? SvPV_nolen (uni) : "(end of string)");
    }

  if (!(dec.json.flags & F_ALLOW_NONREF) && json_nonref(aTHX_ sv)) {
    SvREFCNT_dec (sv);
    croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
  }

  if (UNLIKELY(converted && !(converted - 1))) /* with BOM, and UTF8 was not set */
    json->flags &= ~F_UTF8;
  return sv_2mortal (sv);
}

/*/////////////////////////////////////////////////////////////////////////// */
/* incremental parser */

/* Note that our good friend gcc-9.x crashes here, which looks like one of the
   well-known internal gcc tree-optimizer bugs. */
#ifdef BROKEN_GCC_OPT
// or __attribute__((optimize("no-tree-vectorize")))
__attribute__((optimize("O0")))
#endif
static void
incr_parse (JSON *self)
{
  const char *p = SvPVX (self->incr_text) + self->incr_pos;

  /* the state machine here is a bit convoluted and could be simplified a lot */
  /* but this would make it slower, so... */

  for (;;)
    {
      /*printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D */
      switch (self->incr_mode)
        {
          /* only used for initial whitespace skipping */
          case INCR_M_WS:
            for (;;)
              {
                if (*p > 0x20)
                  {
                    if (*p == '#')
                      {
                        self->incr_mode = INCR_M_C0;
                        goto incr_m_c;
                      }
                    else
                      {
                        self->incr_mode = INCR_M_JSON;
                        goto incr_m_json;
                      }
                  }
                else if (!*p)
                  goto interrupt;

                ++p;
              }

          /* skip a single char inside a string (for \\-processing) */
          case INCR_M_BS:
            if (!*p)
              goto interrupt;

            ++p;
            self->incr_mode = INCR_M_STR;
            goto incr_m_str;

          /* inside #-style comments */
          case INCR_M_C0:



( run in 0.428 second using v1.01-cache-2.11-cpan-39bf76dae61 )