unicode results from the CPAN

Data-Dumper

view release on metacpan or search on metacpan

static bool
globname_supra_ascii(const char *ss, STRLEN len)
{
    const U8 *s = (const U8 *) ss;
    const U8 *send = s+len;
    while (s < send) {
        if (!isASCII(*s))
            return TRUE;
        s++;
    }
    return FALSE;
}
#endif

/* does a hash key need to be quoted (to the left of => ).
   Previously this used (globname_)needs_quote() which accepted strings
   like '::foo', but these aren't safe as unquoted keys under strict.
*/
static bool
key_needs_quote(const char *s, STRLEN len) {
    const char *send = s+len;

    if (safe_decimal_number(s, len)) {
        return FALSE;
    }
    else if (isIDFIRST(*s)) {
        while (++s<send)
            if (!isWORDCHAR(*s))
                return TRUE;
    }
    else
        return TRUE;

    return FALSE;
}

/* Check that the SV can be represented as a simple decimal integer.
 *
 * The perl code does this by matching against /^(?:0|-?[1-9]\d{0,8})\z/
*/
static bool
safe_decimal_number(const char *p, STRLEN len) {
    if (len == 1 && *p == '0')
        return TRUE;

    if (len && *p == '-') {
        ++p;
        --len;
    }

    if (len == 0 || *p < '1' || *p > '9')
        return FALSE;

    ++p;
    --len;

    if (len > 8)
        return FALSE;

    while (len > 0) {
         /* the perl code checks /\d/ but we don't want unicode digits here */
         if (*p < '0' || *p > '9')
             return FALSE;
         ++p;
         --len;
    }
    return TRUE;
}

/* count the number of "'"s and "\"s in string */
static STRLEN
num_q(const char *s, STRLEN slen)
{
    STRLEN ret = 0;

    while (slen > 0) {
	if (*s == '\'' || *s == '\\')
	    ++ret;
	++s;
	--slen;
    }
    return ret;
}


/* returns number of chars added to escape "'"s and "\"s in s */
/* slen number of characters in s will be escaped */
/* destination must be long enough for additional chars */
static STRLEN
esc_q(char *d, const char *s, STRLEN slen)
{
    STRLEN ret = 0;

    while (slen > 0) {
	switch (*s) {
	case '\'':
	case '\\':
	    *d = '\\';
	    ++d; ++ret;
            /* FALLTHROUGH */
	default:
	    *d = *s;
	    ++d; ++s; --slen;
	    break;
	}
    }
    return ret;
}

/* this function is also misused for implementing $Useqq */
static STRLEN
esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
{
    char *r, *rstart;
    const char *s = src;
    const char * const send = src + slen;
    STRLEN j, cur = SvCUR(sv);
    /* Could count 128-255 and 256+ in two variables, if we want to
       be like &qquote and make a distinction.  */
    STRLEN grow = 0;	/* bytes needed to represent chars 128+ */
    /* STRLEN topbit_grow = 0;	bytes needed to represent chars 128-255 */

Dumper.xs view on Meta::CPAN

        sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
		+ 2*qq_escapables + normal);
        rstart = r = SvPVX(sv) + cur;

        *r++ = '"';

        for (s = src; s < send; s += increment) {
            U8 c0 = *(U8 *)s;
            UV k;

            if (do_utf8 && ! UTF8_IS_INVARIANT(c0)) {

                /* In UTF-8, we output as \x{} all chars that require more than
                 * a single byte in UTF-8 to represent. */
                k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);

                /* treat invalid utf8 byte by byte.  This loop iteration gets the
                * first byte */
                increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);

                r = r + my_sprintf(r, "\\x{%" UVxf "}", k);
                continue;
            }

            /* Here 1) isn't UTF-8; or
             *      2) the current character is ASCII; or
             *      3) it is an EBCDIC platform and is a low ordinal
             *         non-ASCII control.
             * In each case the character occupies just one byte */
            k = *(U8*)s;
            increment = 1;

            if (isPRINT(k)) {
                /* These need a backslash escape */
                if (k == '"' || k == '\\' || k == '$' || k == '@') {
                    *r++ = '\\';
                }

                *r++ = (char)k;
            }
            else if (! useqq) { /* non-qq, non-printable, low-ordinal is
                                 * output raw */
                *r++ = (char)k;
            }
            else {  /* Is qq means use escape sequences */
	        bool next_is_digit;

		*r++ = '\\';
		switch (k) {
		case '\a':  *r++ = 'a'; break;
		case '\b':  *r++ = 'b'; break;
		case '\t':  *r++ = 't'; break;
		case '\n':  *r++ = 'n'; break;
		case '\f':  *r++ = 'f'; break;
		case '\r':  *r++ = 'r'; break;
		case ESC_NATIVE: *r++ = 'e'; break;
		default:

		    /* only ASCII digits matter here, which are invariant,
		     * since we only encode characters \377 and under, or
		     * \x177 and under for a unicode string
		     */
                    next_is_digit = (s + 1 < send && isDIGIT(*(s+1)));

		    /* faster than
		     * r = r + my_sprintf(r, "%o", k);
		     */
		    if (k <= 7 && !next_is_digit) {
			*r++ = (char)k + '0';
		    } else if (k <= 63 && !next_is_digit) {
			*r++ = (char)(k>>3) + '0';
			*r++ = (char)(k&7) + '0';
		    } else {
			*r++ = (char)(k>>6) + '0';
			*r++ = (char)((k&63)>>3) + '0';
			*r++ = (char)(k&7) + '0';
		    }
		}
	    }
        }
        *r++ = '"';
    } else {
        /* Single quotes.  */
        sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
		+ qq_escapables + normal);
        rstart = r = SvPVX(sv) + cur;
        *r++ = '\'';
        for (s = src; s < send; s ++) {
            const char k = *s;
            if (k == '\'' || k == '\\')
                *r++ = '\\';
            *r++ = k;
        }
        *r++ = '\'';
    }
    *r = '\0';
    j = r - rstart;
    SvCUR_set(sv, cur + j);

    return j;
}

/* append a repeated string to an SV */
static SV *
sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
{
    if (!sv)
	sv = newSVpvs("");
#ifdef DEBUGGING
    else
	assert(SvTYPE(sv) >= SVt_PV);
#endif

    if (n > 0) {
	SvGROW(sv, len*n + SvCUR(sv) + 1);
	if (len == 1) {
	    char * const start = SvPVX(sv) + SvCUR(sv);
	    SvCUR_set(sv, SvCUR(sv) + n);
	    start[n] = '\0';
	    while (n > 0)
		start[--n] = str[0];

( run in 1.884 second using v1.01-cache-2.11-cpan-39bf76dae61 )