ShiftJIS-X0213-MapUTF
view release on metacpan or search on metacpan
MaxLenToU32,
};
static STRLEN maxlen_fm[NUM_fromUTF] = {
MaxLenFmUni,
MaxLenFmU8,
MaxLenFmU16,
MaxLenFmU16,
MaxLenFmU32,
MaxLenFmU32,
MaxLenFmU16,
MaxLenFmU32,
};
static U8* (*app_uv_in[NUM_toUTF])(U8*, UV) = {
NULL,
app_in_utf8,
app_in_utf16le,
app_in_utf16be,
app_in_utf32le,
app_in_utf32be,
};
static UV (*ord_uv_in[NUM_fromUTF])(U8 *, STRLEN, STRLEN *) = {
NULL,
ord_in_utf8,
ord_in_utf16le,
ord_in_utf16be,
ord_in_utf32le,
ord_in_utf32be,
ord_in_utf16be, /* w/o BOM*/
ord_in_utf32be, /* w/o BOM*/
};
MODULE = ShiftJIS::X0213::MapUTF PACKAGE = ShiftJIS::X0213::MapUTF
PROTOTYPES: DISABLE
void
sjis2004_to_unicode (...)
ALIAS:
sjis2004_to_utf8 = 1
sjis2004_to_utf16le = 2
sjis2004_to_utf16be = 3
sjis2004_to_utf32le = 4
sjis2004_to_utf32be = 5
sjis0213_to_unicode = 6
sjis0213_to_utf8 = 7
sjis0213_to_utf16le = 8
sjis0213_to_utf16be = 9
sjis0213_to_utf32le = 10
sjis0213_to_utf32be = 11
PREINIT:
SV *src, *dst, *cvref;
STRLEN srclen, dstlen, mblen, ulen;
U8 *s, *e, *p, *d, uni[UTF8_MAXLEN + 1];
UV uv, u_temp;
struct leading lb;
U8* (*app_uv)(U8*, UV);
int id_utf, use2004;
PPCODE:
use2004 = ix < NUM_toUTF;
id_utf = ix % NUM_toUTF;
STMT_ASSIGN_CVREF_AND_SRC(funcname_to[ix])
if (SvUTF8(src)) {
src = sv_mortalcopy(src);
sv_utf8_downgrade(src, 0);
}
STMT_ASSIGN_LENDST(maxlen_to[id_utf])
if (id_utf == 0)
SvUTF8_on(dst);
app_uv = app_uv_in[id_utf];
if (cvref) {
for (p = s; p < e; p += mblen) {
STMT_GET_MBLEN
if (!mblen) {
sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
p++;
continue;
}
STMT_GET_UV_FROM_MB
if (uv || !*p) {
if (Is_VALID_UTF(uv)) {
ulen = id_utf ? app_uv(uni, uv) - uni
: uvuni_to_utf8(uni, uv) - uni;
sv_catpvn(dst, (char*)uni, ulen);
}
else {
u_temp = (uv >> 16);
ulen = id_utf ? app_uv(uni, u_temp) - uni
: uvuni_to_utf8(uni, u_temp) - uni;
sv_catpvn(dst, (char*)uni, ulen);
u_temp = (uv & 0xFFFF);
ulen = id_utf ? app_uv(uni, u_temp) - uni
: uvuni_to_utf8(uni, u_temp) - uni;
sv_catpvn(dst, (char*)uni, ulen);
}
}
else
sv_cat_retcvref(dst, cvref, newSVpvn((char*)p, mblen), FALSE);
}
}
else {
d = (U8*)SvPVX(dst);
for (p = s; p < e; p += mblen) {
STMT_GET_MBLEN
if (!mblen) {
p++;
continue;
}
STMT_GET_UV_FROM_MB
if (uv || !*p) {
if (Is_VALID_UTF(uv)) {
d = id_utf ? app_uv(d, uv) : uvuni_to_utf8(d, uv);
}
else {
u_temp = (uv >> 16);
d = id_utf ? app_uv(d, u_temp) : uvuni_to_utf8(d, u_temp);
u_temp = (uv & 0xFFFF);
d = id_utf ? app_uv(d, u_temp) : uvuni_to_utf8(d, u_temp);
}
}
}
*d = '\0';
SvCUR_set(dst, d - (U8*)SvPVX(dst));
}
XPUSHs(dst);
void
unicode_to_sjis2004 (...)
ALIAS:
utf8_to_sjis2004 = 1
utf16le_to_sjis2004 = 2
utf16be_to_sjis2004 = 3
utf32le_to_sjis2004 = 4
utf32be_to_sjis2004 = 5
utf16_to_sjis2004 = 6
utf32_to_sjis2004 = 7
unicode_to_sjis0213 = 8
utf8_to_sjis0213 = 9
utf16le_to_sjis0213 = 10
utf16be_to_sjis0213 = 11
utf32le_to_sjis0213 = 12
utf32be_to_sjis0213 = 13
utf16_to_sjis0213 = 14
utf32_to_sjis0213 = 15
PREINIT:
SV *src, *dst, *cvref;
STRLEN srclen, dstlen, retlen;
U8 *s, *e, *p, *d, mbc[3];
U16 j, *tbl_row, **tbl_plain;
UV uv, uv2;
UV (*ord_uv)(U8 *, STRLEN, STRLEN *);
int id_utf, use2004;
PPCODE:
use2004 = ix < NUM_fromUTF;
id_utf = ix % NUM_fromUTF;
STMT_ASSIGN_CVREF_AND_SRC(funcname_fm[ix])
if (id_utf == 0 && !SvUTF8(src)) {
src = sv_mortalcopy(src);
sv_utf8_upgrade(src);
}
else if (id_utf && SvUTF8(src)) {
src = sv_mortalcopy(src);
sv_utf8_downgrade(src, FALSE);
}
STMT_ASSIGN_LENDST(maxlen_fm[id_utf])
ord_uv = ord_uv_in[id_utf];
if (id_utf == 6 && 2 <= e - s) { /* UTF-16 */
if (memEQ("\xFF\xFE",s,2)) {
s += 2;
ord_uv = ord_in_utf16le;
}
else if (memEQ("\xFE\xFF",s,2)) {
s += 2;
}
}
else if (id_utf == 7 && 4 <= e - s) { /* UTF-32 */
if (memEQ("\xFF\xFE\x00\x00",s,4)) {
s += 4;
ord_uv = ord_in_utf32le;
}
else if (memEQ("\x00\x00\xFE\xFF",s,4)) {
s += 4;
}
}
if (cvref) {
for (p = s; p < e;) {
uv = id_utf
? ord_uv(p, e - p, &retlen)
: utf8n_to_uvuni(p, (e - p), &retlen, 0);
if (retlen)
p += retlen;
else {
sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
p++;
continue;
}
STMT_FETCH_FROM_UV_AND_UV2
if (j || !uv) {
if (j >= 256) {
mbc[0] = (U8)(j >> 8);
mbc[1] = (U8)(j & 0xff);
sv_catpvn(dst, (char*)mbc, 2);
}
else {
mbc[0] = (U8)(j & 0xff);
sv_catpvn(dst, (char*)mbc, 1);
( run in 0.848 second using v1.01-cache-2.11-cpan-5511b514fd6 )