ShiftJIS-X0213-MapUTF

 view release on metacpan or  search on metacpan

MapUTF.xs  view on Meta::CPAN

    MaxLenToU32,
};

static STRLEN maxlen_fm[NUM_fromUTF] = {
    MaxLenFmUni,
    MaxLenFmU8,
    MaxLenFmU16,
    MaxLenFmU16,
    MaxLenFmU32,
    MaxLenFmU32,
    MaxLenFmU16,
    MaxLenFmU32,
};

static U8* (*app_uv_in[NUM_toUTF])(U8*, UV) = {
    NULL,
    app_in_utf8,
    app_in_utf16le,
    app_in_utf16be,
    app_in_utf32le,
    app_in_utf32be,
};

static UV (*ord_uv_in[NUM_fromUTF])(U8 *, STRLEN, STRLEN *) = {
    NULL,
    ord_in_utf8,
    ord_in_utf16le,
    ord_in_utf16be,
    ord_in_utf32le,
    ord_in_utf32be,
    ord_in_utf16be, /* w/o BOM*/
    ord_in_utf32be, /* w/o BOM*/
};

MODULE = ShiftJIS::X0213::MapUTF	PACKAGE = ShiftJIS::X0213::MapUTF

PROTOTYPES: DISABLE

void
sjis2004_to_unicode (...)
  ALIAS:
    sjis2004_to_utf8    = 1
    sjis2004_to_utf16le = 2
    sjis2004_to_utf16be = 3
    sjis2004_to_utf32le = 4
    sjis2004_to_utf32be = 5
    sjis0213_to_unicode = 6
    sjis0213_to_utf8    = 7
    sjis0213_to_utf16le = 8
    sjis0213_to_utf16be = 9
    sjis0213_to_utf32le = 10
    sjis0213_to_utf32be = 11
  PREINIT:
    SV *src, *dst, *cvref;
    STRLEN srclen, dstlen, mblen, ulen;
    U8 *s, *e, *p, *d, uni[UTF8_MAXLEN + 1];
    UV uv, u_temp;
    struct leading lb;
    U8* (*app_uv)(U8*, UV);
    int  id_utf, use2004;
  PPCODE:
    use2004 = ix < NUM_toUTF;
    id_utf  = ix % NUM_toUTF;

    STMT_ASSIGN_CVREF_AND_SRC(funcname_to[ix])
    if (SvUTF8(src)) {
	src = sv_mortalcopy(src);
	sv_utf8_downgrade(src, 0);
    }
    STMT_ASSIGN_LENDST(maxlen_to[id_utf])
    if (id_utf == 0)
	SvUTF8_on(dst);

    app_uv = app_uv_in[id_utf];

    if (cvref) {
	for (p = s; p < e; p += mblen) {
	    STMT_GET_MBLEN
	    if (!mblen) {
		sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
		p++;
		continue;
	    }
	    STMT_GET_UV_FROM_MB

	    if (uv || !*p) {
		if (Is_VALID_UTF(uv)) {
		    ulen = id_utf ? app_uv(uni, uv) - uni
				  : uvuni_to_utf8(uni, uv) - uni;
		    sv_catpvn(dst, (char*)uni, ulen);
		}
		else {
		    u_temp = (uv >> 16);
		    ulen = id_utf ? app_uv(uni, u_temp) - uni
				  : uvuni_to_utf8(uni, u_temp) - uni;
		    sv_catpvn(dst, (char*)uni, ulen);

		    u_temp = (uv & 0xFFFF);
		    ulen = id_utf ? app_uv(uni, u_temp) - uni
				  : uvuni_to_utf8(uni, u_temp) - uni;
		    sv_catpvn(dst, (char*)uni, ulen);
		}
	    }
	    else
		sv_cat_retcvref(dst, cvref, newSVpvn((char*)p, mblen), FALSE);
	}
    }
    else {
	d = (U8*)SvPVX(dst);
	for (p = s; p < e; p += mblen) {
	    STMT_GET_MBLEN
	    if (!mblen) {
		p++;
		continue;
	    }
	    STMT_GET_UV_FROM_MB

	    if (uv || !*p) {
		if (Is_VALID_UTF(uv)) {
		    d = id_utf ? app_uv(d, uv) : uvuni_to_utf8(d, uv);
		}
		else {
		    u_temp = (uv >> 16);
		    d = id_utf ? app_uv(d, u_temp) : uvuni_to_utf8(d, u_temp);

		    u_temp = (uv & 0xFFFF);
		    d = id_utf ? app_uv(d, u_temp) : uvuni_to_utf8(d, u_temp);
		}
	    }
	}
	*d = '\0';
	SvCUR_set(dst, d - (U8*)SvPVX(dst));
    }
    XPUSHs(dst);


void
unicode_to_sjis2004 (...)
  ALIAS:
       utf8_to_sjis2004 = 1
    utf16le_to_sjis2004 = 2
    utf16be_to_sjis2004 = 3
    utf32le_to_sjis2004 = 4
    utf32be_to_sjis2004 = 5
      utf16_to_sjis2004 = 6
      utf32_to_sjis2004 = 7
    unicode_to_sjis0213 = 8
       utf8_to_sjis0213 = 9
    utf16le_to_sjis0213 = 10
    utf16be_to_sjis0213 = 11
    utf32le_to_sjis0213 = 12
    utf32be_to_sjis0213 = 13
      utf16_to_sjis0213 = 14
      utf32_to_sjis0213 = 15
  PREINIT:
    SV *src, *dst, *cvref;
    STRLEN srclen, dstlen, retlen;
    U8 *s, *e, *p, *d, mbc[3];
    U16 j, *tbl_row, **tbl_plain;
    UV uv, uv2;
    UV (*ord_uv)(U8 *, STRLEN, STRLEN *);
    int  id_utf, use2004;
  PPCODE:
    use2004 = ix < NUM_fromUTF;
    id_utf  = ix % NUM_fromUTF;

    STMT_ASSIGN_CVREF_AND_SRC(funcname_fm[ix])
    if (id_utf == 0 && !SvUTF8(src)) {
	src = sv_mortalcopy(src);
	sv_utf8_upgrade(src);
    }
    else if (id_utf && SvUTF8(src)) {
	src = sv_mortalcopy(src);
	sv_utf8_downgrade(src, FALSE);
    }
    STMT_ASSIGN_LENDST(maxlen_fm[id_utf])

    ord_uv = ord_uv_in[id_utf];

    if (id_utf == 6 && 2 <= e - s) { /* UTF-16 */
	if (memEQ("\xFF\xFE",s,2)) {
	    s += 2;
	    ord_uv = ord_in_utf16le;
	}
	else if (memEQ("\xFE\xFF",s,2)) {
	    s += 2;
	}
    }
    else if (id_utf == 7 && 4 <= e - s) { /* UTF-32 */
	if (memEQ("\xFF\xFE\x00\x00",s,4)) {
	    s += 4;
	    ord_uv = ord_in_utf32le;
	}
	else if (memEQ("\x00\x00\xFE\xFF",s,4)) {
	    s += 4;
	}
    }

    if (cvref) {
	for (p = s; p < e;) {
	    uv = id_utf
		? ord_uv(p, e - p, &retlen)
		: utf8n_to_uvuni(p, (e - p), &retlen, 0);

	    if (retlen)
		p += retlen;
	    else {
		sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
		p++;
		continue;
	    }

	    STMT_FETCH_FROM_UV_AND_UV2

	    if (j || !uv) {
		if (j >= 256) {
		    mbc[0] = (U8)(j >> 8);
		    mbc[1] = (U8)(j & 0xff);
		    sv_catpvn(dst, (char*)mbc, 2);
		}
		else {
		    mbc[0] = (U8)(j & 0xff);
		    sv_catpvn(dst, (char*)mbc, 1);



( run in 0.848 second using v1.01-cache-2.11-cpan-5511b514fd6 )