/* * $Id: uni.c,v 2.0 2005/05/16 19:08:16 dankogai Exp $ * (c) 1999-2003 Dan Kogai * This library is free software; you can redistribute it and/or * modify it under the same terms as Perl itself. */ #include #include #include "uni2euc.h" #include "euc2uni.h" #ifndef U8 #define U8 unsigned char #endif #ifndef U16 #define U16 unsigned short #endif #ifndef U32 #define U32 unsigned long #endif U32 _ucs2_euc(U8 *dst, U8 *src, U32 nchar){ U32 result = 0; U32 len; char *offset; for (nchar /= 2; nchar > 0; nchar--, src += 2){ offset = uni2euc[src[0]] + src[1]*4; strncpy((char *)dst, offset, 4); len = strlen(offset); dst += len; result += len; } return result; } # define FB_UNI 0xFFFd # define CHKUTEN(x) (0 <= (x) && (x) < 94*94) U32 _euc_ucs2(U8 *dst, U8 *src){ U32 result = 0; U32 kuten; U16 ucs2; for (result = 0; *src != '\0'; src++, dst += 2, result += 2){ if (*src <= 0x7F){ /* ASCII */ ucs2 = src[0]; }else if (*src == 0x8e){ /* jisx0201 */ if (src[1]){ ucs2 = j01_uni[src[1]]; src += 1; }else{ ucs2 = FB_UNI; } }else if (*src == 0x8f){ /* jisx0212 */ if (src[1] && src[2]){ kuten = (src[1] - 0xa1)*94 + (src[2] - 0xa1); ucs2 = CHKUTEN(kuten) ? j12_uni[kuten] : FB_UNI; src += 2; }else{ ucs2 = FB_UNI; if (src[1]) src++; } }else{ /* jisx0208 */ if (src[1]){ kuten = (src[0] - 0xa1)*94 + (src[1] - 0xa1); ucs2 = CHKUTEN(kuten) ? j08_uni[kuten] : FB_UNI; src += 1; }else{ ucs2 = FB_UNI; } } dst[0] = ucs2/256; dst[1] = ucs2%256; } return result; } U32 _ucs2_utf8(U8 *dst, U8 *src, U32 nchar){ U32 ucs2; U32 result = 0; for (nchar /= 2; nchar > 0; nchar--, src += 2) { ucs2 = src[0]*256 + src[1]; if (ucs2 < 0x80){ /* 1 byte */ *dst++ = ucs2; result += 1; }else if (ucs2 < 0x800){ /* 2 bytes */ *dst++ = (0xC0 | (ucs2 >> 6)); *dst++ = (0x80 | (ucs2 & 0x3F)); result += 2; }else{ /* 3 bytes */ *dst++ = (0xE0 | (ucs2 >> 12)); *dst++ = (0x80 | ((ucs2 >> 6) & 0x3F)); *dst++ = (0x80 | (ucs2 & 0x3F)); result += 3; } } *dst = '\0'; return result; } U32 _utf8_ucs2(U8 *dst, U8 *src){ U32 ucs2; U8 c1, c2, c3; U32 result = 0; for(; *src != '\0'; src++, result++){ if (*src < 0x80) { /* 1 byte */ ucs2 = *src; }else if (*src < 0xE0){ /* 2 bytes */ if (src[1]){ c1 = *src++; c2 = *src; ucs2 = ((c1 & 0x1F) << 6) | (c2 & 0x3F); }else{ ucs2 = FB_UNI; } }else{ /* 3 bytes */ if (src[1] && src[2]){ c1 = *src++; c2 = *src++; c3 = *src; ucs2 = ((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6)| (c3 & 0x3F); }else{ ucs2 = FB_UNI; if (src[1]) src++; } } *dst++ = (ucs2 & 0xff00) >> 8; /* 1st byte */ *dst++ = (ucs2 & 0xff); /* 2nd byte */; } return result * 2; } U32 _euc_utf8(U8 *dst, U8 *src){ U32 result = 0; U32 kuten; U16 ucs2; for (result = 0; *src != '\0'; src++){ if (*src <= 0x7F){ /* ASCII */ ucs2 = src[0]; }else if (*src == 0x8e){ /* jisx0201 */ if (src[1]){ ucs2 = j01_uni[src[1]]; src += 1; }else{ ucs2 = FB_UNI; } }else if (*src == 0x8f){ /* jisx0212 */ if (src[1] && src[2]){ kuten = (src[1] - 0xa1)*94 + (src[2] - 0xa1); ucs2 = CHKUTEN(kuten) ? j12_uni[kuten] : FB_UNI; src += 2; }else{ ucs2 = FB_UNI; if (src[1]) src++; } }else{ /* jisx0208 */ if (src[1]){ kuten = (src[0] - 0xa1)*94 + (src[1] - 0xa1); ucs2 = CHKUTEN(kuten) ? j08_uni[kuten] : FB_UNI; src += 1; }else{ ucs2 = FB_UNI; } } if (ucs2 < 0x80){ /* 1 byte */ *dst++ = ucs2; result += 1; }else if (ucs2 < 0x800){ /* 2 bytes */ *dst++ = (0xC0 | (ucs2 >> 6)); *dst++ = (0x80 | (ucs2 & 0x3F)); result += 2; }else{ /* 3 bytes */ *dst++ = (0xE0 | (ucs2 >> 12)); *dst++ = (0x80 | ((ucs2 >> 6) & 0x3F)); *dst++ = (0x80 | (ucs2 & 0x3F)); result += 3; } } *dst = '\0'; return result; } U32 _utf8_euc(U8 *dst, U8 *src){ U32 result = 0; U32 len; U16 ucs2; U8 c1, c2, c3; char *offset; for(; *src != '\0'; src++){ if (*src < 0x80) { /* 1 byte */ ucs2 = *src; }else if (*src < 0xE0){ /* 2 bytes */ if (src[1]){ c1 = *src++; c2 = *src; ucs2 = ((c1 & 0x1F) << 6) | (c2 & 0x3F); }else{ ucs2 = FB_UNI; } }else{ /* 3 bytes */ if (src[1] && src[2]){ c1 = *src++; c2 = *src++; c3 = *src; ucs2 = ((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6)| (c3 & 0x3F); }else{ ucs2 = FB_UNI; if (src[1]){ src++; } } } offset = uni2euc[ucs2/256] + (ucs2%256)*4; strncpy((char *)dst, offset, 4); len = strlen(offset); dst += len; result += len; } return result; } #ifndef PERL_XS #include int main(int argc, char **argv){ U8 buf1[1024], buf2[1024]; int result; FILE *IN; if (argc > 1){ IN = fopen(argv[1], "r"); if (IN == NULL){ fprintf(stderr, "Can't open %s; %s\n", argv[1], strerror(errno)); exit(-1); } }else{ IN = stdin; } #ifdef EUC_UTF8 while(fgets(buf2, 256, IN)){ result = _euc_utf8(buf1, buf2); fputs(buf1, stdout); } #else while(fgets(buf1, 256, IN)){ result = _utf8_euc(buf2, buf1); fputs(buf2, stdout); } #endif } #endif