#include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define PkgName "Unicode::Transform" /* Some functions are defined in this. */ #include "unitrans.h" #define Num_UTFs_here (8) /* in the range of valid Unicode (0..10FFFF) */ static STDCHAR MaxLenAmplUni[Num_UTFs_here * Num_UTFs_here] = { 1, 2, 2, 4, 4, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 4, 1, 2, 2, 2, 2, 2, 4, 4, 2, 1, 1, 2, 2, 2, 4, 4, 2, 1, 1, }; static UV (*ord_uv_in[Num_UTFs_here])(U8 *, STRLEN, STRLEN *) = { ord_in_unicode, ord_in_utf16le, ord_in_utf16be, ord_in_utf32le, ord_in_utf32be, ord_in_utf8, ord_in_utf8mod, ord_in_utfcp1047, }; static U8* (*app_uv_in[Num_UTFs_here])(U8 *, UV) = { app_in_unicode, app_in_utf16le, app_in_utf16be, app_in_utf32le, app_in_utf32be, app_in_utf8, app_in_utf8mod, app_in_utfcp1047, }; static void sv_cat_retcvref (SV *dst, SV *cv, SV *sv) { dSP; int count; ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(sv_2mortal(sv)); PUTBACK; count = call_sv(cv, G_EVAL|G_SCALAR); SPAGAIN; if (SvTRUE(ERRSV) || count != 1) { croak("died in subroutine call from XS, " PkgName "\n"); } sv_catsv(dst,POPs); PUTBACK; FREETMPS; LEAVE; } MODULE = Unicode::Transform PACKAGE = Unicode::Transform SV* unicode_to_unicode (arg1, arg2=0) SV* arg1 SV* arg2 PROTOTYPE: $;$ ALIAS: unicode_to_utf16le = 1 unicode_to_utf16be = 2 unicode_to_utf32le = 3 unicode_to_utf32be = 4 unicode_to_utf8 = 5 unicode_to_utf8mod = 6 unicode_to_utfcp1047 = 7 utf16le_to_unicode = 8 utf16le_to_utf16le = 9 utf16le_to_utf16be = 10 utf16le_to_utf32le = 11 utf16le_to_utf32be = 12 utf16le_to_utf8 = 13 utf16le_to_utf8mod = 14 utf16le_to_utfcp1047 = 15 utf16be_to_unicode = 16 utf16be_to_utf16le = 17 utf16be_to_utf16be = 18 utf16be_to_utf32le = 19 utf16be_to_utf32be = 20 utf16be_to_utf8 = 21 utf16be_to_utf8mod = 22 utf16be_to_utfcp1047 = 23 utf32le_to_unicode = 24 utf32le_to_utf16le = 25 utf32le_to_utf16be = 26 utf32le_to_utf32le = 27 utf32le_to_utf32be = 28 utf32le_to_utf8 = 29 utf32le_to_utf8mod = 30 utf32le_to_utfcp1047 = 31 utf32be_to_unicode = 32 utf32be_to_utf16le = 33 utf32be_to_utf16be = 34 utf32be_to_utf32le = 35 utf32be_to_utf32be = 36 utf32be_to_utf8 = 37 utf32be_to_utf8mod = 38 utf32be_to_utfcp1047 = 39 utf8_to_unicode = 40 utf8_to_utf16le = 41 utf8_to_utf16be = 42 utf8_to_utf32le = 43 utf8_to_utf32be = 44 utf8_to_utf8 = 45 utf8_to_utf8mod = 46 utf8_to_utfcp1047 = 47 utf8mod_to_unicode = 48 utf8mod_to_utf16le = 49 utf8mod_to_utf16be = 50 utf8mod_to_utf32le = 51 utf8mod_to_utf32be = 52 utf8mod_to_utf8 = 53 utf8mod_to_utf8mod = 54 utf8mod_to_utfcp1047 = 55 utfcp1047_to_unicode = 56 utfcp1047_to_utf16le = 57 utfcp1047_to_utf16be = 58 utfcp1047_to_utf32le = 59 utfcp1047_to_utf32be = 60 utfcp1047_to_utf8 = 61 utfcp1047_to_utf8mod = 62 utfcp1047_to_utfcp1047 = 63 PREINIT: SV *src, *dst, *cvref; STRLEN srclen, dstlen, retlen, ulen; U8 *s, *e, *p, *d, ubuf[UTF8_MAXLEN + 1]; UV uv; UV (*ord_uv)(U8 *, STRLEN, STRLEN *); U8* (*app_uv)(U8*, UV); int from_utf_num, to_utf_num; bool from_unicode, to_unicode; CODE: cvref = NULL; if (SvROK(arg1)) { if (SvTYPE(SvRV(arg1)) == SVt_PVCV) cvref = SvRV(arg1); else croak(PkgName " CALLBACK is not a CODEREF"); } src = cvref ? items == 1 ? &PL_sv_undef : arg2 : arg1; from_utf_num = ix / Num_UTFs_here; to_utf_num = ix % Num_UTFs_here; from_unicode = from_utf_num == 0; to_unicode = to_utf_num == 0; if (!from_unicode && SvUTF8(src)) { src = sv_mortalcopy(src); sv_utf8_downgrade(src, 0); } else if (from_unicode && !SvUTF8(src)) { src = sv_mortalcopy(src); sv_utf8_upgrade(src); } s = (U8*)SvPV(src,srclen); e = s + srclen; dstlen = srclen * MaxLenAmplUni[ix] + 1; dst = newSV(dstlen); (void)SvPOK_only(dst); if (to_unicode) { SvUTF8_on(dst); } ord_uv = ord_uv_in[from_utf_num]; app_uv = app_uv_in[to_utf_num]; if (cvref) { for (p = s; p < e;) { uv = ord_uv(p, e - p, &retlen); if (retlen) p += retlen; else uv = (UV)*p++; if (retlen && Is_VALID_UTF(uv)) { ulen = app_uv(ubuf, uv) - ubuf; sv_catpvn(dst, (char*)ubuf, ulen); } else sv_cat_retcvref(dst, cvref, newSVuv(uv)); } } else { d = (U8*)SvPVX(dst); for (p = s; p < e;) { uv = ord_uv(p, e - p, &retlen); if (retlen) p += retlen; else { p++; continue; } if (Is_VALID_UTF(uv)) d = app_uv(d, uv); } *d = '\0'; SvCUR_set(dst, d - (U8*)SvPVX(dst)); } RETVAL = dst; OUTPUT: RETVAL SV* chr_unicode (uv) UV uv PROTOTYPE: $ ALIAS: chr_utf16le = 1 chr_utf16be = 2 chr_utf32le = 3 chr_utf32be = 4 chr_utf8 = 5 chr_utf8mod = 6 chr_utfcp1047 = 7 PREINIT: SV *dst; U8 *u, ubuf[UTF8_MAXLEN + 1]; U8* (*app_uv)(U8*, UV); CODE: dst = newSVpvn("", 0); (void)SvPOK_only(dst); if (ix == 0) { SvUTF8_on(dst); } app_uv = app_uv_in[ix]; u = app_uv(ubuf, uv); if (u == ubuf) XSRETURN_UNDEF; sv_catpvn(dst, (char*)ubuf, u - ubuf); RETVAL = dst; OUTPUT: RETVAL SV* ord_unicode (src) SV* src PROTOTYPE: $ ALIAS: ord_utf16le = 1 ord_utf16be = 2 ord_utf32le = 3 ord_utf32be = 4 ord_utf8 = 5 ord_utf8mod = 6 ord_utfcp1047 = 7 PREINIT: STRLEN srclen, retlen; U8 *s; UV uv; UV (*ord_uv)(U8 *, STRLEN, STRLEN *); CODE: if (ix != 0 && SvUTF8(src)) { src = sv_mortalcopy(src); sv_utf8_downgrade(src, 0); } else if (ix == 0 && !SvUTF8(src)) { src = sv_mortalcopy(src); sv_utf8_upgrade(src); } s = (U8*)SvPV(src,srclen); if (!srclen) XSRETURN_UNDEF; ord_uv = ord_uv_in[ix]; uv = ord_uv(s, srclen, &retlen); RETVAL = retlen ? newSVuv(uv) : &PL_sv_undef; OUTPUT: RETVAL