Lingua-KO-Hangul-Util
view release on metacpan or search on metacpan
t = tmp;
t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
if (tindex)
t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
*t = '\0';
sv_catpvn(sv, (char *)tmp, strlen((char *)tmp));
}
static void sv_cat_decompJamo (SV* sv, UV uv)
{
U8 idx, *t, tmp[3 * UTF8_MAXLEN + 1];
STDCHAR *s, *p;
if (! Hangul_IsJ(uv))
return;
idx = (U8)(uv & 0xFF);
t = tmp;
for (p = s = LKHU_Decomp[idx];
(p == s) || (p - s < LKHU_DecompCnt && *p != LKHU_DecompEnd);
p++)
/* (p == s) for U+11FF (must output at least one char */
{
t = uvuni_to_utf8(t, 0x1100 + (UV)(*p & 0xFF));
}
*t = '\0';
sv_catpvn(sv, (char *)tmp, strlen((char *)tmp));
}
static char * sv_2pvunicode(SV *sv, STRLEN *lp)
{
char *s;
STRLEN len;
s = (char*)SvPV(sv,len);
if (!SvUTF8(sv)) {
SV* tmpsv = sv_mortalcopy(sv);
if (!SvPOK(tmpsv))
(void)sv_pvn_force(tmpsv,&len);
sv_utf8_upgrade(tmpsv);
s = (char*)SvPV(tmpsv,len);
}
*lp = len;
return s;
}
MODULE = Lingua::KO::Hangul::Util PACKAGE = Lingua::KO::Hangul::Util
void
composeJamo(src)
SV * src
PROTOTYPE: $
PREINIT:
SV *dst;
U8 *s, *p, *e, *d;
UV uv;
STRLEN srclen, dstlen, retlen;
LKHU_CompStruct *i, *complist;
bool fcomposed;
PPCODE:
s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
dstlen = srclen + 1; /* equal or shorter */
dst = newSV(dstlen);
SvPOK_only(dst);
SvUTF8_on(dst);
d = (U8*)SvPVX(dst);
for (p = s; p < e;) {
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
if (!retlen)
croak(ErrRetlenIsZero);
p += retlen;
fcomposed = FALSE;
if (Hangul_IsJ(uv) && (complist = LKHU_Comp[uv & 0xFF])) {
for (i = complist; i->len; i++) {
if (i->len <= (e - p) && memEQ(p, i->trail, i->len)) {
d = uvuni_to_utf8(d, i->composite);
p += i->len;
fcomposed = TRUE;
break;
}
}
}
if (!fcomposed)
d = uvuni_to_utf8(d, uv);
}
*d = '\0';
SvCUR_set(dst, d - (U8*)SvPVX(dst));
XPUSHs(dst);
void
composeSyllable(src)
SV * src
PROTOTYPE: $
PREINIT:
SV *dst;
U8 *s, *p, *e, *d;
UV lastuv, nextuv, lindex, vindex, tindex;
STRLEN srclen, dstlen, curlen, retlen;
PPCODE:
s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
dstlen = srclen + 1; /* equal or shorter */
dst = newSV(dstlen);
SvPOK_only(dst);
SvUTF8_on(dst);
d = (U8*)SvPVX(dst);
for (p = s; p < e; ) {
lastuv = utf8n_to_uvuni(p, e - p, &retlen, 0);
if (!retlen)
croak(ErrRetlenIsZero);
p += retlen;
curlen = e - p;
if (curlen) {
nextuv = utf8n_to_uvuni(p, curlen, &retlen, 0);
if (!retlen)
croak(ErrRetlenIsZero);
}
/* 1. composed if lastuv is L and nextuv is V. */
if (curlen && Hangul_IsL(lastuv) && Hangul_IsV(nextuv)) {
lindex = lastuv - Hangul_LBase;
vindex = nextuv - Hangul_VBase;
lastuv = Hangul_SBase +
(lindex * Hangul_VCount + vindex) * Hangul_TCount;
p += retlen;
curlen = e - p;
if (curlen) {
nextuv = utf8n_to_uvuni(p, curlen, &retlen, 0);
if (!retlen)
croak(ErrRetlenIsZero);
}
}
/* 2. composed if lastuv is LV and nextuv is T. */
if (curlen && Hangul_IsLV(lastuv) && Hangul_IsT(nextuv)) {
tindex = nextuv - Hangul_TBase;
lastuv += tindex;
p += retlen;
}
d = uvuni_to_utf8(d, lastuv);
}
*d = '\0';
SvCUR_set(dst, d - (U8*)SvPVX(dst));
XPUSHs(dst);
SV*
decomposeSyllable(src)
SV * src
PROTOTYPE: $
ALIAS:
decomposeJamo = 1
PREINIT:
UV uv;
SV *dst;
STRLEN srclen, retlen;
U8 *s, *e, *p;
CODE:
s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
dst = newSV(1);
SvPOK_only(dst);
SvUTF8_on(dst);
for (p = s; p < e; p += retlen) {
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
if (!retlen)
croak(ErrRetlenIsZero);
if (ix == 0 && Hangul_IsS(uv))
sv_cat_decompSyl(dst, uv);
else if (ix == 1 && Hangul_IsJ(uv))
sv_cat_decompJamo(dst, uv);
else
sv_catpvn(dst, (char *)p, retlen);
}
RETVAL = dst;
OUTPUT:
RETVAL
void
decomposeHangul(code)
UV code
PROTOTYPE: $
PREINIT:
UV sindex, lindex, vindex, tindex;
SV *decomp;
U8 *t, temp[3 * UTF8_MAXLEN + 1];
STRLEN tlen;
int notwantarray;
PPCODE:
notwantarray = (GIMME_V != G_ARRAY);
if (! Hangul_IsS(code)) {
if (notwantarray)
XSRETURN_UNDEF;
else
XSRETURN_EMPTY;
}
sindex = code - Hangul_SBase;
lindex = sindex / Hangul_NCount;
vindex = (sindex % Hangul_NCount) / Hangul_TCount;
tindex = sindex % Hangul_TCount;
if (notwantarray) {
t = temp;
t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
if (tindex)
t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
*t = '\0';
tlen = strlen((char*)temp);
decomp = sv_2mortal(newSVpvn((char*)temp, tlen));
SvUTF8_on(decomp);
XPUSHs(decomp);
}
else {
XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
if (tindex)
XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
} /* decomposeHangul */
void
getHangulComposite(uv,uv2)
UV uv
UV uv2
PROTOTYPE: $$
PREINIT:
UV ret_uv;
PPCODE:
ret_uv = compositeHangul(uv, uv2);
if (ret_uv)
XPUSHs(sv_2mortal(newSVuv(ret_uv)));
else
XSRETURN_UNDEF;
SV*
getHangulName(code)
UV code
PROTOTYPE: $
PREINIT:
UV sindex, lindex, vindex, tindex;
char name[Hangul_NameMax + 1];
STRLEN nlen;
CODE:
if (! Hangul_IsS(code))
XSRETURN_UNDEF;
sindex = code - Hangul_SBase;
lindex = sindex / Hangul_NCount;
vindex = (sindex % Hangul_NCount) / Hangul_TCount;
tindex = sindex % Hangul_TCount;
strcpy(name, Hangul_BName);
strcat(name, hangul_JamoL[lindex]);
strcat(name, hangul_JamoV[vindex]);
strcat(name, hangul_JamoT[tindex]);
nlen = strlen(name);
RETVAL = newSVpvn(name, nlen);
OUTPUT:
RETVAL
SV*
parseHangulName(sv)
SV* sv
PROTOTYPE: $
PREINIT:
UV uv;
char jamoL[Hangul_LLenMax];
char jamoV[Hangul_VLenMax];
char jamoT[Hangul_TLenMax];
char *s, *p, *e;
STRLEN slen, llen, vlen, tlen;
CODE:
s = SvPV(sv, slen);
e = s + slen;
if (slen <= Hangul_BNameLen ||
Hangul_NameMax < slen ||
strnNE(s, Hangul_BName, Hangul_BNameLen))
XSRETURN_UNDEF;
s += Hangul_BNameLen;
for (p = s; p < e && p < s + Hangul_LLenMax; p++) {
if (IsHangulNameC(*p))
jamoL[p - s] = *p;
else if (IsHangulNameV(*p))
break;
else
( run in 0.475 second using v1.01-cache-2.11-cpan-5511b514fd6 )