unicode_helper.c - metacpan.org

/*
 * $Id: unicode_helper.c 9700 2007-07-04 14:12:58Z mjevans $
 */
#ifdef WITH_UNICODE

#include "ODBC.h"
#include <stdio.h>
#include "ConvertUTF.h"

typedef enum { do_new=1, do_cat, do_set } new_cat_set_t;

/* static prototypes */
static unsigned short utf16_len(UTF16 *wp);
static void utf16_copy(UTF16 *d, UTF16 *s);

static SV * _dosvwv(SV * sv, UTF16 * wp, STRLEN len, new_cat_set_t mode);



/*
 * If len>=0, wp is an array of <len> wide characters without a
 * termination character.
 * If len==-1, wp is a null-terminated wide string
 */
static SV * _dosvwv(SV * sv, UTF16 * wp, STRLEN len, new_cat_set_t mode)
{
    char * p=NULL;
    STRLEN svlen;

#ifdef WIN32
    int bytes;
    bytes=WideCharToMultiByte(CP_UTF8,0,wp,len,NULL,0,NULL,NULL);
    Newz(0,p,1+bytes,char);	/* allocate bytes+1 chars - ptr to p */
    if (bytes!=0) {
        if(!WideCharToMultiByte(CP_UTF8,0,wp,len,p,bytes,NULL,NULL)) {
            int err=GetLastError();
            switch (err) {
              case ERROR_INSUFFICIENT_BUFFER:
                croak("_dosvwv: WideCharToMultiByte() failed: insufficient buffer");
              case ERROR_INVALID_FLAGS:
                croak("_dosvwv: WideCharToMultiByte() failed: invalid flags");
              case ERROR_INVALID_PARAMETER:
                croak("_dosvwv: WideCharToMultiByte() failed: invalid parameter");
              default:
                croak("_dosvwv: WideCharToMultiByte() failed: error code %i",err);
            }
        }
    }
    svlen=(len==-1 ? strlen(p) : bytes);
#else
    unsigned int bytes;
    if (len == -1) {
        len = utf16_len(wp);
    }
    if (len > 0) {
      ConversionResult ret;
      UTF16 *source_start = wp;
      UTF16 *source_end = source_start + len;
      UTF8 *target_start;
      UTF8 *target_end;

      /* Test conversion and find size UTF* of buffer we need */
      ret = ConvertUTF16toUTF8((const UTF16 **)&source_start, source_end,
			       NULL, NULL, strictConversion, &bytes);
      /*fprintf(stderr, "Bytes = %d\n", bytes);*/
      
      if (ret != conversionOK) {
	if (ret == sourceExhausted) {
	  croak("_dosvwc: Partial character in input");
	} else if (ret == targetExhausted) {
	  croak("_dosvwc: target buffer exhausted");
	} else if (ret == sourceIllegal) {
	  croak("_dosvwc: malformed/illegal source sequence");
	} else {
	  croak("_dosvwc: unknown ConvertUTF16toUTF8 error");
        }
      }
      Newz(0, p, bytes + 1, char);
      /* convert UTF16 to UTF8 */
      target_start = p;
      target_end = p + bytes;
      source_start = (UTF16 *)wp;
      source_end = source_start + len;
      ret = ConvertUTF16toUTF8((const UTF16 **)&source_start, source_end,
			       &target_start, target_end,
			       strictConversion, &bytes);
      /*fprintf(stderr, "%s\n", p);*/
      
      if (ret != conversionOK) {
	croak("_dosvwc: second call to ConvertUTF16toUTF8 failed (%d)", ret);
      }
      svlen = bytes;
    } else {
        svlen = 0;
    }
#endif
    
    switch (mode) {
      case do_new:
        sv=newSVpvn(p,svlen);
        break;
      case do_cat:
        sv_catpvn(sv,p,svlen);
        break;
      case do_set:
        sv_setpvn(sv,p,svlen);
        break;
      default:
        croak("_dosvwv called with bad mode value");
    }
    if (*p) {
        SvUTF8_on(sv);
    } else if (mode!=do_cat) {
        SvUTF8_off(sv); /* Don't switch off UTF8 just because we *APPENDED* an empty string! sv may still be UTF8. */
    }
    Safefree(p);
    return sv;
}

/*
 * Set the string value of an SV* to a representation of a UTF16 * value,
 * similar to sv_setpvn() and sv_setpv()
 * SV contains UTF-8 representation of wp, has UTF8-Flag on except for
 * empty strings
 *
 * wp is an array of <len> wide characters without a termination character
 */
void sv_setwvn(SV * sv, UTF16 * wp, STRLEN len)
{
    if (wp==NULL) {
        sv_setpvn(sv,NULL,len);
    } else if (len==0) {
        sv_setpvn(sv,"",0);
    } else {
        _dosvwv(sv,wp,len,do_set);
    }
}

/*
 * Get a UTF16 * representation of a char *
 * The representation is a converted copy, so the result needs to be freed
 * usng WVfree().
 * char * s == NULL is handled properly
 *
 * Does not handle byte arrays, only null-terminated strings.
 */
UTF16 * WValloc(char * s)
{
    UTF16 * buf=NULL;
    if (NULL!=s) {
#ifdef WIN32
        int widechars=MultiByteToWideChar(CP_UTF8,0,s,-1,NULL,0);
        Newz(0,buf,widechars+1,UTF16);
        if (widechars!=0) {
            MultiByteToWideChar(CP_UTF8,0,s,-1,buf,widechars);
        }
#else
        unsigned int widechrs, bytes;
        size_t slen;
        ConversionResult ret;
        UTF8 *source_start, *source_end;
        UTF16 *target_start, *target_end;

        slen = strlen(s);
        /*fprintf(stderr, "utf8 string \\%s\\ is %ld bytes long\n", s, strlen(s));*/
        
        source_start = s;
        source_end = s + slen + 1;              /* include NUL terminator */

        ret = ConvertUTF8toUTF16(
            (const UTF8 **)&source_start, source_end,
            NULL, NULL, strictConversion, &bytes);
        if (ret != conversionOK) {
            if (ret == sourceExhausted) {
                croak("WValloc: Partial character in input");
            } else if (ret == targetExhausted) {
                croak("WValloc: target buffer exhausted");
            } else if (ret == sourceIllegal) {
                croak("WValloc: malformed/illegal source sequence");
            } else {
                croak("WValloc: unknown ConvertUTF16toUTF8 error");
            }
        }
        /*fprintf(stderr,"utf8 -> utf16 requires %d bytes\n", bytes);*/
        
        widechrs = bytes / sizeof(UTF16);
        /*fprintf(stderr, "Allocating %d wide chrs\n", widechrs);*/
        
        Newz(0,buf,widechrs+1,UTF16);
        if (widechrs != 0) {
            source_start = s;
            source_end = s + slen + 1;
            target_start = buf;
            target_end = buf + widechrs + 1;
            /*fprintf(stderr, "%p %p %p %p\n", source_start, source_end, target_start, target_end);*/
            
            ret = ConvertUTF8toUTF16(
                (const UTF8 **)&source_start, source_end,
                &target_start, target_end, strictConversion, &bytes);
            if (ret != conversionOK) {
                croak("WValloc: second call to ConvertUTF8toUTF16 failed (%d)", ret);
            }
            /*fprintf(stderr, "Second returned %d bytes\n", bytes);*/
            
        }
#endif
    }
    return buf;
}


/*
 * Free a UTF16 * representation of a char *
 * Used to free the return values of WValloc()
 */
void WVfree(UTF16 * wp)
{
    if (wp != NULL) Safefree(wp);
}


/*
 * Get a char * representation of a UTF16 *
 * The representation is a converted copy, so the result needs to be freed
 * using PVfree().
 * wp == NULL is handled properly
 *
 * Does not handle byte arrays, only null-terminated strings.
 */

char * PVallocW(UTF16 * wp)
{
    char * p=NULL;
    if (wp!=NULL) {

#ifdef WIN32
        int bytes=WideCharToMultiByte(CP_UTF8,0,wp,-1,NULL,0,NULL,NULL);
        Newz(0,p,bytes,char);
        if (!WideCharToMultiByte(CP_UTF8,0,wp,-1,p,bytes,NULL,NULL)) {
            croak("WideCharToMultiByte() failed");
        }
#else
        ConversionResult ret;
        UTF16 *source_start;
        UTF16 *source_end;
        unsigned int bytes;
        UTF8 *target_start;
        UTF8 *target_end;
        unsigned int len;
        
        if (wp != NULL) {
            len = utf16_len(wp);
        }
        source_start = (UTF16 *)wp;
        source_end = source_start + len;
        ret = ConvertUTF16toUTF8((const UTF16 **)&source_start, source_end,
                                 NULL, NULL, strictConversion, &bytes);
        if (ret != conversionOK) {
            if (ret == sourceExhausted) {
                croak("PVallocW: Partial character in input");
            } else if (ret == targetExhausted) {
                croak("PVallocW: target buffer exhausted");
            } else if (ret == sourceIllegal) {
                croak("PVallocW: malformed/illegal source sequence");
            } else {
                croak("PVallocW: unknown ConvertUTF16toUTF8 error");
            }
        }
        Newz(0,p,bytes,char);
        target_start = p;
        target_end = p + bytes;
        source_start = (UTF16 *)wp;
        source_end = source_start + len;
        ret = ConvertUTF16toUTF8((const UTF16 **)&source_start, source_end,
                                 &target_start, target_end,
                                 strictConversion, &bytes);
        if (ret != conversionOK) {
            croak("PVallocW: second call to ConvertUTF16toUTF8 failed (%d)", ret);
        }
#endif
    }
    return p;
}


/*
 * Free a UTF16 * representation of a char *
 * Used to free the return value of PVallocW()
 * char * s == NULL is handled properly
 */
void PVfreeW(char * s)
{
    if (s!=NULL) Safefree(s);
}


/*
 * Mutate an SV's PV INPLACE to contain UTF-16. Does not handle byte arrays,
 * only null-terminated strings.
 * Turns the UTF8 flag OFF unconditionally, because SV becomes a byte array
 * (for Perl).
 */
void SV_toWCHAR(SV * sv)
{
    STRLEN len;
    UTF16 * wp;
    char * p;
    if (!SvOK(sv)) {
        /* warn("SV_toWCHAR called for undef"); */
        return;
    }
    p=SvPVutf8_force(sv,len);
    /* _force makes sure SV is only a string */
    wp=WValloc(p);
    len=utf16_len(wp);
    p=SvGROW(sv,sizeof(UTF16)*(1+len));
    utf16_copy((UTF16 *)p,wp);
    SvCUR_set(sv,sizeof(UTF16)*len);
    WVfree(wp);
    SvPOK_only(sv); /* sv is nothing but a non-UTF8 string -- for Perl ;-) */
}

static unsigned short utf16_len(UTF16 *wp)
{
    unsigned short len = 0;

    if (!wp) return 0;
    
    while (*wp != 0) {
        wp++;
        len++;
    }
    return len;
}
static void utf16_copy(UTF16 *d, UTF16 *s)
{
    while(*s) {
        *d++ = *s++;
    }
}
#endif /* WITH_UNICODE */
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)