Lingua-KO-Hangul-Util

 view release on metacpan or  search on metacpan

Util.xsX  view on Meta::CPAN

    t = tmp;
    t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
    t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
    if (tindex)
	t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
    *t = '\0';
    sv_catpvn(sv, (char *)tmp, strlen((char *)tmp));
}

static void sv_cat_decompJamo (SV* sv, UV uv)
{
    U8 idx, *t, tmp[3 * UTF8_MAXLEN + 1];
    STDCHAR *s, *p;

    if (! Hangul_IsJ(uv))
	return;

    idx = (U8)(uv & 0xFF);
    t = tmp;
    for (p = s = LKHU_Decomp[idx];
	 (p == s) || (p - s < LKHU_DecompCnt && *p != LKHU_DecompEnd);
	 p++)
    /* (p == s) for U+11FF (must output at least one char */
    {
	t = uvuni_to_utf8(t, 0x1100 + (UV)(*p & 0xFF));
    }
    *t = '\0';
    sv_catpvn(sv, (char *)tmp, strlen((char *)tmp));
}

static char * sv_2pvunicode(SV *sv, STRLEN *lp)
{
    char *s;
    STRLEN len;
    s = (char*)SvPV(sv,len);
    if (!SvUTF8(sv)) {
	SV* tmpsv = sv_mortalcopy(sv);
	if (!SvPOK(tmpsv))
	    (void)sv_pvn_force(tmpsv,&len);
	sv_utf8_upgrade(tmpsv);
	s = (char*)SvPV(tmpsv,len);
    }
    *lp = len;
    return s;
}

MODULE = Lingua::KO::Hangul::Util	PACKAGE = Lingua::KO::Hangul::Util


void
composeJamo(src)
    SV * src
  PROTOTYPE: $
  PREINIT:
    SV *dst;
    U8 *s, *p, *e, *d;
    UV uv;
    STRLEN srclen, dstlen, retlen;
    LKHU_CompStruct *i, *complist;
    bool fcomposed;
  PPCODE:
    s = (U8*)sv_2pvunicode(src,&srclen);
    e = s + srclen;

    dstlen = srclen + 1; /* equal or shorter */
    dst = newSV(dstlen);
    SvPOK_only(dst);
    SvUTF8_on(dst);
    d = (U8*)SvPVX(dst);

    for (p = s; p < e;) {
	uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
	if (!retlen)
	    croak(ErrRetlenIsZero);
	p += retlen;

	fcomposed = FALSE;
	if (Hangul_IsJ(uv) && (complist = LKHU_Comp[uv & 0xFF])) {
	    for (i = complist; i->len; i++) {
		if (i->len <= (e - p) && memEQ(p, i->trail, i->len)) {
		    d = uvuni_to_utf8(d, i->composite);
		    p += i->len;
		    fcomposed = TRUE;
		    break;
		}
	    }
	}
	if (!fcomposed)
	    d = uvuni_to_utf8(d, uv);
    }
    *d = '\0';
    SvCUR_set(dst, d - (U8*)SvPVX(dst));
    XPUSHs(dst);



void
composeSyllable(src)
    SV * src
  PROTOTYPE: $
  PREINIT:
    SV *dst;
    U8 *s, *p, *e, *d;
    UV lastuv, nextuv, lindex, vindex, tindex;
    STRLEN srclen, dstlen, curlen, retlen;
  PPCODE:
    s = (U8*)sv_2pvunicode(src,&srclen);
    e = s + srclen;

    dstlen = srclen + 1; /* equal or shorter */
    dst = newSV(dstlen);
    SvPOK_only(dst);
    SvUTF8_on(dst);
    d = (U8*)SvPVX(dst);

    for (p = s; p < e; ) {
	lastuv = utf8n_to_uvuni(p, e - p, &retlen, 0);
	if (!retlen)
	    croak(ErrRetlenIsZero);

	p += retlen;
	curlen = e - p;
	if (curlen) {
	    nextuv = utf8n_to_uvuni(p, curlen, &retlen, 0);
	    if (!retlen)
		croak(ErrRetlenIsZero);
	}

     /* 1. composed if lastuv is L and nextuv is V. */
	if (curlen && Hangul_IsL(lastuv) && Hangul_IsV(nextuv)) {
	    lindex = lastuv - Hangul_LBase;
	    vindex = nextuv - Hangul_VBase;
	    lastuv = Hangul_SBase +
		(lindex * Hangul_VCount + vindex) * Hangul_TCount;
	    p += retlen;
	    curlen = e - p;
	    if (curlen) {
		nextuv = utf8n_to_uvuni(p, curlen, &retlen, 0);
		if (!retlen)
		    croak(ErrRetlenIsZero);
	    }
	}

     /* 2. composed if lastuv is LV and nextuv is T. */
	if (curlen && Hangul_IsLV(lastuv) && Hangul_IsT(nextuv)) {
	    tindex = nextuv - Hangul_TBase;
	    lastuv += tindex;
	    p += retlen;
	}

	d = uvuni_to_utf8(d, lastuv);
    }
    *d = '\0';
    SvCUR_set(dst, d - (U8*)SvPVX(dst));
    XPUSHs(dst);


SV*
decomposeSyllable(src)
    SV * src
  PROTOTYPE: $
  ALIAS:
    decomposeJamo = 1
  PREINIT:
    UV uv;
    SV *dst;
    STRLEN srclen, retlen;
    U8 *s, *e, *p;
  CODE:
    s = (U8*)sv_2pvunicode(src,&srclen);
    e = s + srclen;

    dst = newSV(1);
    SvPOK_only(dst);
    SvUTF8_on(dst);

    for (p = s; p < e; p += retlen) {
	uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
	if (!retlen)
	    croak(ErrRetlenIsZero);

	if (ix == 0 && Hangul_IsS(uv))
	    sv_cat_decompSyl(dst, uv);
	else if (ix == 1 && Hangul_IsJ(uv))
	    sv_cat_decompJamo(dst, uv);
	else
	    sv_catpvn(dst, (char *)p, retlen);
    }
    RETVAL = dst;
  OUTPUT:
    RETVAL


void
decomposeHangul(code)
    UV code
  PROTOTYPE: $
  PREINIT:
    UV sindex, lindex, vindex, tindex;
    SV *decomp;
    U8 *t, temp[3 * UTF8_MAXLEN + 1];
    STRLEN tlen;
    int notwantarray;
  PPCODE:
    notwantarray = (GIMME_V != G_ARRAY);
    if (! Hangul_IsS(code)) {
	if (notwantarray)
	    XSRETURN_UNDEF;
	else
	    XSRETURN_EMPTY;
    }

    sindex = code - Hangul_SBase;
    lindex =  sindex / Hangul_NCount;
    vindex = (sindex % Hangul_NCount) / Hangul_TCount;
    tindex =  sindex % Hangul_TCount;

    if (notwantarray) {
	t = temp;
	t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
	t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
	if (tindex)
	    t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
	*t = '\0';
	tlen = strlen((char*)temp);
	decomp = sv_2mortal(newSVpvn((char*)temp, tlen));
	SvUTF8_on(decomp);
	XPUSHs(decomp);
    }
    else {
	XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
	XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
	if (tindex)
	    XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
    } /* decomposeHangul */


void
getHangulComposite(uv,uv2)
    UV uv
    UV uv2
  PROTOTYPE: $$
  PREINIT:
    UV ret_uv;
  PPCODE:
    ret_uv = compositeHangul(uv, uv2);
    if (ret_uv)
	XPUSHs(sv_2mortal(newSVuv(ret_uv)));
    else
	XSRETURN_UNDEF;


SV*
getHangulName(code)
    UV code
  PROTOTYPE: $
  PREINIT:
    UV sindex, lindex, vindex, tindex;
    char name[Hangul_NameMax + 1];
    STRLEN nlen;
  CODE:
    if (! Hangul_IsS(code))
	XSRETURN_UNDEF;

    sindex = code - Hangul_SBase;
    lindex =  sindex / Hangul_NCount;
    vindex = (sindex % Hangul_NCount) / Hangul_TCount;
    tindex =  sindex % Hangul_TCount;
    strcpy(name, Hangul_BName);
    strcat(name, hangul_JamoL[lindex]);
    strcat(name, hangul_JamoV[vindex]);
    strcat(name, hangul_JamoT[tindex]);
    nlen = strlen(name);
    RETVAL = newSVpvn(name, nlen);
  OUTPUT:
    RETVAL


SV*
parseHangulName(sv)
    SV* sv
  PROTOTYPE: $
  PREINIT:
    UV uv;
    char jamoL[Hangul_LLenMax];
    char jamoV[Hangul_VLenMax];
    char jamoT[Hangul_TLenMax];
    char *s, *p, *e;
    STRLEN slen, llen, vlen, tlen;
  CODE:
    s = SvPV(sv, slen);
    e = s + slen;

    if (slen <= Hangul_BNameLen ||
	Hangul_NameMax < slen ||
	strnNE(s, Hangul_BName, Hangul_BNameLen))
	XSRETURN_UNDEF;

    s += Hangul_BNameLen;
    for (p = s; p < e && p < s + Hangul_LLenMax; p++) {
	if (IsHangulNameC(*p))
	    jamoL[p - s] = *p;
	else if (IsHangulNameV(*p))
	    break;
	else



( run in 0.475 second using v1.01-cache-2.11-cpan-5511b514fd6 )