ShiftJIS-CP932-MapUTF

 view release on metacpan or  search on metacpan

MapUTF.xs  view on Meta::CPAN

    MaxLenToUni,
    MaxLenToU8,
    MaxLenToU16,
    MaxLenToU16,
    MaxLenToU32,
    MaxLenToU32,
};

static STRLEN maxlen_fm[] = {
    MaxLenFmUni,
    MaxLenFmU8,
    MaxLenFmU16,
    MaxLenFmU16,
    MaxLenFmU32,
    MaxLenFmU32,
    MaxLenFmU16,
    MaxLenFmU32,
};

static U8* (*app_uv_in[])(U8*, UV) = {
    NULL,
    app_in_utf8,
    app_in_utf16le,
    app_in_utf16be,
    app_in_utf32le,
    app_in_utf32be,
};

static UV (*ord_uv_in[])(U8 *, STRLEN, STRLEN *) = {
    NULL,
    ord_in_utf8,
    ord_in_utf16le,
    ord_in_utf16be,
    ord_in_utf32le,
    ord_in_utf32be,
    ord_in_utf16be, /* w/o BOM*/
    ord_in_utf32be, /* w/o BOM*/
};


MODULE = ShiftJIS::CP932::MapUTF	PACKAGE = ShiftJIS::CP932::MapUTF

PROTOTYPES: DISABLE

void
cp932_to_unicode(...)
  ALIAS:
    cp932_to_utf8    = 1
    cp932_to_utf16le = 2
    cp932_to_utf16be = 3
    cp932_to_utf32le = 4
    cp932_to_utf32be = 5
  PREINIT:
    SV *src, *dst, *cvref, *mod;
    STRLEN srclen, dstlen, modlen, mblen, ulen;
    U8 *s, *e, *p, *d, *m, *m_e, uni[UTF8_MAXLEN + 1];
    UV uv;
    struct leading lb;
    U8* (*app_uv)(U8*, UV);
    bool mod_g, mod_s, mod_t;
  PPCODE:
    STMT_ASSIGN_CVREF_AND_SRC(funcname_to[ix])
    if (SvUTF8(src)) {
	src = sv_mortalcopy(src);
	sv_utf8_downgrade(src, 0);
    }
    STMT_ASSIGN_LENDST(maxlen_to[ix])
    if (ix == 0)
	SvUTF8_on(dst);

    m = (U8*)SvPV(mod, modlen);
    for (p = m, m_e = m + modlen; p < m_e; p++) {
	if (*p == 'g' || *p == 's' || *p == 't')
	    continue;
    	croak("Unknown option in %s: '%c'", funcname_to[ix], *p);
    }
    mod_g = memchr((void*)m, 'g', modlen) != NULL;
    mod_s = memchr((void*)m, 's', modlen) != NULL;
    mod_t = memchr((void*)m, 't', modlen) != NULL;

    app_uv = app_uv_in[ix];

    if (cvref) {
	for (p = s; p < e; p += mblen) {
	    STMT_GET_MBLEN

	    if (!mblen) {
		sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
		p++;
		continue;
	    }

	    STMT_GET_UV_FROM_MB

	    if (uv || !*p) {
		ulen = ix ? app_uv(uni, uv) - uni
			  : uvuni_to_utf8(uni, uv) - uni;
		sv_catpvn(dst, (char*)uni, ulen);
	    } else
		sv_cat_retcvref(dst, cvref, newSVpvn((char*)p, mblen), FALSE);
	}
    }
    else {
	d = (U8*)SvPVX(dst);
	for (p = s; p < e; p += mblen) {
	    STMT_GET_MBLEN

	    if (!mblen) {
		p++;
		continue;
	    }

	    STMT_GET_UV_FROM_MB

	    if (uv || !*p) {
		d = ix ? app_uv(d, uv) : uvuni_to_utf8(d, uv);
	    }
	}
	*d = '\0';
	SvCUR_set(dst, d - (U8*)SvPVX(dst));
    }
    XPUSHs(dst);


void
unicode_to_cp932(...)
  ALIAS:
    utf8_to_cp932    = 1
    utf16le_to_cp932 = 2
    utf16be_to_cp932 = 3
    utf32le_to_cp932 = 4
    utf32be_to_cp932 = 5
    utf16_to_cp932   = 6
    utf32_to_cp932   = 7
  PREINIT:
    SV *src, *dst, *cvref, *mod;
    STRLEN srclen, dstlen, modlen, retlen;
    U8 *s, *e, *p, *d, *m, *m_e, mbc[3];
    U16 j, *t;
    UV uv;
    UV (*ord_uv)(U8 *, STRLEN, STRLEN *);
    bool mod_g, mod_s, mod_f;
  PPCODE:
    STMT_ASSIGN_CVREF_AND_SRC(funcname_fm[ix])
    if (ix == 0 && !SvUTF8(src)) {
	src = sv_mortalcopy(src);
	sv_utf8_upgrade(src);
    }
    else if (ix && SvUTF8(src)) {
	src = sv_mortalcopy(src);
	sv_utf8_downgrade(src, FALSE);
    }
    STMT_ASSIGN_LENDST(maxlen_fm[ix])

    m = (U8*)SvPV(mod, modlen);
    for (p = m, m_e = m + modlen; p < m_e; p++) {
	if (*p == 'g' || *p == 's' || *p == 'f')
	    continue;
    	croak("Unknown option in %s: '%c'", funcname_fm[ix], *p);
    }
    mod_g = memchr((void*)m, 'g', modlen) != NULL;
    mod_s = memchr((void*)m, 's', modlen) != NULL;
    mod_f = memchr((void*)m, 'f', modlen) != NULL;

    ord_uv = ord_uv_in[ix];

    if (ix == 6 && 2 <= e - s) { /* UTF-16 */
	if (memEQ("\xFF\xFE",s,2)) {
	    s += 2;
	    ord_uv = ord_in_utf16le;
	}
	else if (memEQ("\xFE\xFF",s,2)) {
	    s += 2;
	}
    }
    else if (ix == 7 && 4 <= e - s) {  /* UTF-32 */
	if (memEQ("\xFF\xFE\x00\x00",s,4)) {
	    s += 4;
	    ord_uv = ord_in_utf32le;
	}
	else if (memEQ("\x00\x00\xFE\xFF",s,4)) {
	    s += 4;
	}
    }

    if (cvref) {
	for (p = s; p < e;) {
	    uv = ix
		? ord_uv(p, e - p, &retlen)
		: utf8n_to_uvuni(p, (e - p), &retlen, 0);

	    if (retlen)
		p += retlen;
	    else {
		sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
		p++;
		continue;
	    }

	    if (mod_g && Is_CP932_PUAe(uv)) {
		uv -= CP932_PUA_BASE;
		mbc[0] = (U8)((uv / 188) + 0xF0);
		mbc[1] = (U8)(uv % 188 + (uv % 188 > 0x3E ? 0x41 : 0x40));



( run in 1.586 second using v1.01-cache-2.11-cpan-5511b514fd6 )