ShiftJIS-CP932-MapUTF
view release on metacpan or search on metacpan
MaxLenToUni,
MaxLenToU8,
MaxLenToU16,
MaxLenToU16,
MaxLenToU32,
MaxLenToU32,
};
static STRLEN maxlen_fm[] = {
MaxLenFmUni,
MaxLenFmU8,
MaxLenFmU16,
MaxLenFmU16,
MaxLenFmU32,
MaxLenFmU32,
MaxLenFmU16,
MaxLenFmU32,
};
static U8* (*app_uv_in[])(U8*, UV) = {
NULL,
app_in_utf8,
app_in_utf16le,
app_in_utf16be,
app_in_utf32le,
app_in_utf32be,
};
static UV (*ord_uv_in[])(U8 *, STRLEN, STRLEN *) = {
NULL,
ord_in_utf8,
ord_in_utf16le,
ord_in_utf16be,
ord_in_utf32le,
ord_in_utf32be,
ord_in_utf16be, /* w/o BOM*/
ord_in_utf32be, /* w/o BOM*/
};
MODULE = ShiftJIS::CP932::MapUTF PACKAGE = ShiftJIS::CP932::MapUTF
PROTOTYPES: DISABLE
void
cp932_to_unicode(...)
ALIAS:
cp932_to_utf8 = 1
cp932_to_utf16le = 2
cp932_to_utf16be = 3
cp932_to_utf32le = 4
cp932_to_utf32be = 5
PREINIT:
SV *src, *dst, *cvref, *mod;
STRLEN srclen, dstlen, modlen, mblen, ulen;
U8 *s, *e, *p, *d, *m, *m_e, uni[UTF8_MAXLEN + 1];
UV uv;
struct leading lb;
U8* (*app_uv)(U8*, UV);
bool mod_g, mod_s, mod_t;
PPCODE:
STMT_ASSIGN_CVREF_AND_SRC(funcname_to[ix])
if (SvUTF8(src)) {
src = sv_mortalcopy(src);
sv_utf8_downgrade(src, 0);
}
STMT_ASSIGN_LENDST(maxlen_to[ix])
if (ix == 0)
SvUTF8_on(dst);
m = (U8*)SvPV(mod, modlen);
for (p = m, m_e = m + modlen; p < m_e; p++) {
if (*p == 'g' || *p == 's' || *p == 't')
continue;
croak("Unknown option in %s: '%c'", funcname_to[ix], *p);
}
mod_g = memchr((void*)m, 'g', modlen) != NULL;
mod_s = memchr((void*)m, 's', modlen) != NULL;
mod_t = memchr((void*)m, 't', modlen) != NULL;
app_uv = app_uv_in[ix];
if (cvref) {
for (p = s; p < e; p += mblen) {
STMT_GET_MBLEN
if (!mblen) {
sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
p++;
continue;
}
STMT_GET_UV_FROM_MB
if (uv || !*p) {
ulen = ix ? app_uv(uni, uv) - uni
: uvuni_to_utf8(uni, uv) - uni;
sv_catpvn(dst, (char*)uni, ulen);
} else
sv_cat_retcvref(dst, cvref, newSVpvn((char*)p, mblen), FALSE);
}
}
else {
d = (U8*)SvPVX(dst);
for (p = s; p < e; p += mblen) {
STMT_GET_MBLEN
if (!mblen) {
p++;
continue;
}
STMT_GET_UV_FROM_MB
if (uv || !*p) {
d = ix ? app_uv(d, uv) : uvuni_to_utf8(d, uv);
}
}
*d = '\0';
SvCUR_set(dst, d - (U8*)SvPVX(dst));
}
XPUSHs(dst);
void
unicode_to_cp932(...)
ALIAS:
utf8_to_cp932 = 1
utf16le_to_cp932 = 2
utf16be_to_cp932 = 3
utf32le_to_cp932 = 4
utf32be_to_cp932 = 5
utf16_to_cp932 = 6
utf32_to_cp932 = 7
PREINIT:
SV *src, *dst, *cvref, *mod;
STRLEN srclen, dstlen, modlen, retlen;
U8 *s, *e, *p, *d, *m, *m_e, mbc[3];
U16 j, *t;
UV uv;
UV (*ord_uv)(U8 *, STRLEN, STRLEN *);
bool mod_g, mod_s, mod_f;
PPCODE:
STMT_ASSIGN_CVREF_AND_SRC(funcname_fm[ix])
if (ix == 0 && !SvUTF8(src)) {
src = sv_mortalcopy(src);
sv_utf8_upgrade(src);
}
else if (ix && SvUTF8(src)) {
src = sv_mortalcopy(src);
sv_utf8_downgrade(src, FALSE);
}
STMT_ASSIGN_LENDST(maxlen_fm[ix])
m = (U8*)SvPV(mod, modlen);
for (p = m, m_e = m + modlen; p < m_e; p++) {
if (*p == 'g' || *p == 's' || *p == 'f')
continue;
croak("Unknown option in %s: '%c'", funcname_fm[ix], *p);
}
mod_g = memchr((void*)m, 'g', modlen) != NULL;
mod_s = memchr((void*)m, 's', modlen) != NULL;
mod_f = memchr((void*)m, 'f', modlen) != NULL;
ord_uv = ord_uv_in[ix];
if (ix == 6 && 2 <= e - s) { /* UTF-16 */
if (memEQ("\xFF\xFE",s,2)) {
s += 2;
ord_uv = ord_in_utf16le;
}
else if (memEQ("\xFE\xFF",s,2)) {
s += 2;
}
}
else if (ix == 7 && 4 <= e - s) { /* UTF-32 */
if (memEQ("\xFF\xFE\x00\x00",s,4)) {
s += 4;
ord_uv = ord_in_utf32le;
}
else if (memEQ("\x00\x00\xFE\xFF",s,4)) {
s += 4;
}
}
if (cvref) {
for (p = s; p < e;) {
uv = ix
? ord_uv(p, e - p, &retlen)
: utf8n_to_uvuni(p, (e - p), &retlen, 0);
if (retlen)
p += retlen;
else {
sv_cat_retcvref(dst, cvref, newSVuv((UV)*p), TRUE);
p++;
continue;
}
if (mod_g && Is_CP932_PUAe(uv)) {
uv -= CP932_PUA_BASE;
mbc[0] = (U8)((uv / 188) + 0xF0);
mbc[1] = (U8)(uv % 188 + (uv % 188 > 0x3E ? 0x41 : 0x40));
( run in 1.586 second using v1.01-cache-2.11-cpan-5511b514fd6 )