Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
--HG-- extra : convert_revision : svn%3Ac70aab31-4412-0410-b14c-859654838e24/trunk%401505
This commit is contained in:
parent
5f71e5a774
commit
3750cb6c4d
8 changed files with 958 additions and 5 deletions
|
@ -94,7 +94,7 @@ if test x$enable_libc = xyes; then
|
|||
|
||||
dnl Check for C library headers
|
||||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h signal.h)
|
||||
AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h iconv.h signal.h)
|
||||
|
||||
dnl Check for typedefs, structures, etc.
|
||||
AC_TYPE_SIZE_T
|
||||
|
@ -116,7 +116,7 @@ if test x$enable_libc = xyes; then
|
|||
if test x$ac_cv_func_strtod = xyes; then
|
||||
AC_DEFINE(HAVE_STRTOD)
|
||||
fi
|
||||
AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp sscanf snprintf vsnprintf sigaction setjmp nanosleep)
|
||||
AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep)
|
||||
|
||||
AC_CHECK_LIB(m, pow, [BUILD_LDFLAGS="$BUILD_LDFLAGS -lm"])
|
||||
fi
|
||||
|
|
|
@ -68,6 +68,7 @@
|
|||
#undef HAVE_STDINT_H
|
||||
#undef HAVE_CTYPE_H
|
||||
#undef HAVE_MATH_H
|
||||
#undef HAVE_ICONV_H
|
||||
#undef HAVE_SIGNAL_H
|
||||
#undef HAVE_ALTIVEC_H
|
||||
|
||||
|
@ -118,9 +119,11 @@
|
|||
#undef HAVE_STRNCMP
|
||||
#undef HAVE_STRICMP
|
||||
#undef HAVE_STRCASECMP
|
||||
#undef HAVE_STRNCASECMP
|
||||
#undef HAVE_SSCANF
|
||||
#undef HAVE_SNPRINTF
|
||||
#undef HAVE_VSNPRINTF
|
||||
#undef HAVE_ICONV
|
||||
#undef HAVE_SIGACTION
|
||||
#undef HAVE_SETJMP
|
||||
#undef HAVE_NANOSLEEP
|
||||
|
|
|
@ -70,6 +70,9 @@
|
|||
#if HAVE_CTYPE_H
|
||||
# include <ctype.h>
|
||||
#endif
|
||||
#if HAVE_ICONV_H
|
||||
# include <iconv.h>
|
||||
#endif
|
||||
|
||||
/* The number of elements in an array */
|
||||
#define SDL_arraysize(array) (sizeof(array)/sizeof(array[0]))
|
||||
|
@ -518,6 +521,12 @@ extern DECLSPEC int SDLCALL SDL_strncmp(const char *str1, const char *str2, size
|
|||
extern DECLSPEC int SDLCALL SDL_strcasecmp(const char *str1, const char *str2);
|
||||
#endif
|
||||
|
||||
#if HAVE_STRNCASECMP
|
||||
#define SDL_strncasecmp strncasecmp
|
||||
#else
|
||||
extern DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen);
|
||||
#endif
|
||||
|
||||
#if HAVE_SSCANF
|
||||
#define SDL_sscanf sscanf
|
||||
#else
|
||||
|
@ -536,6 +545,32 @@ extern DECLSPEC int SDLCALL SDL_snprintf(char *text, size_t maxlen, const char *
|
|||
extern DECLSPEC int SDLCALL SDL_vsnprintf(char *text, size_t maxlen, const char *fmt, va_list ap);
|
||||
#endif
|
||||
|
||||
/* The SDL implementation of iconv() returns these error codes */
|
||||
#define SDL_ICONV_ERROR (size_t)-1
|
||||
#define SDL_ICONV_E2BIG (size_t)-2
|
||||
#define SDL_ICONV_EILSEQ (size_t)-3
|
||||
#define SDL_ICONV_EINVAL (size_t)-4
|
||||
|
||||
#if HAVE_ICONV
|
||||
#define SDL_iconv_t iconv_t
|
||||
#define SDL_iconv_open iconv_open
|
||||
#define SDL_iconv_close iconv_close
|
||||
extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
|
||||
#else
|
||||
typedef struct _SDL_iconv_t *SDL_iconv_t;
|
||||
extern DECLSPEC SDL_iconv_t SDLCALL SDL_iconv_open(const char *tocode, const char *fromcode);
|
||||
extern DECLSPEC int SDLCALL SDL_iconv_close(SDL_iconv_t cd);
|
||||
extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
|
||||
#endif
|
||||
/* This function converts a string between encodings in one pass, returning a
|
||||
string that must be freed with SDL_free() or NULL on error.
|
||||
*/
|
||||
extern DECLSPEC char * SDLCALL SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft);
|
||||
#define SDL_iconv_utf8_ascii(S) SDL_iconv_string("ASCII", "UTF-8", S, SDL_strlen(S)+1)
|
||||
#define SDL_iconv_utf8_latin1(S) SDL_iconv_string("LATIN1", "UTF-8", S, SDL_strlen(S)+1)
|
||||
#define SDL_iconv_utf8_ucs2(S) (Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1)
|
||||
#define SDL_iconv_utf8_ucs4(S) (Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1)
|
||||
|
||||
/* Ends C function definitions when using C++ */
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
809
src/stdlib/SDL_iconv.c
Normal file
809
src/stdlib/SDL_iconv.c
Normal file
|
@ -0,0 +1,809 @@
|
|||
/*
|
||||
SDL - Simple DirectMedia Layer
|
||||
Copyright (C) 1997-2006 Sam Lantinga
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Sam Lantinga
|
||||
slouken@libsdl.org
|
||||
*/
|
||||
#include "SDL_config.h"
|
||||
|
||||
/* This file contains portable iconv functions for SDL */
|
||||
|
||||
#include "SDL_stdinc.h"
|
||||
#include "SDL_endian.h"
|
||||
|
||||
#ifdef HAVE_ICONV
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
size_t SDL_iconv(SDL_iconv_t cd,
|
||||
char **inbuf, size_t *inbytesleft,
|
||||
char **outbuf, size_t *outbytesleft)
|
||||
{
|
||||
size_t retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
if ( retCode == (size_t)-1 ) {
|
||||
switch(errno) {
|
||||
case E2BIG:
|
||||
return SDL_ICONV_E2BIG;
|
||||
case EILSEQ:
|
||||
return SDL_ICONV_EILSEQ;
|
||||
case EINVAL:
|
||||
return SDL_ICONV_EINVAL;
|
||||
default:
|
||||
return SDL_ICONV_ERROR;
|
||||
}
|
||||
}
|
||||
return retCode;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define UNICODE_BOM 0xFEFF
|
||||
|
||||
#define UNKNOWN_ASCII '?'
|
||||
#define UNKNOWN_UNICODE 0xFFFD
|
||||
|
||||
enum {
|
||||
ENCODING_UNKNOWN,
|
||||
ENCODING_ASCII,
|
||||
ENCODING_LATIN1,
|
||||
ENCODING_UTF8,
|
||||
ENCODING_UTF16, /* Needs byte order marker */
|
||||
ENCODING_UTF16BE,
|
||||
ENCODING_UTF16LE,
|
||||
ENCODING_UTF32, /* Needs byte order marker */
|
||||
ENCODING_UTF32BE,
|
||||
ENCODING_UTF32LE,
|
||||
ENCODING_UCS2, /* Native byte order assumed */
|
||||
ENCODING_UCS4, /* Native byte order assumed */
|
||||
};
|
||||
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
|
||||
#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
|
||||
#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
|
||||
#else
|
||||
#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
|
||||
#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
|
||||
#endif
|
||||
|
||||
struct _SDL_iconv_t
|
||||
{
|
||||
int src_fmt;
|
||||
int dst_fmt;
|
||||
};
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int format;
|
||||
} encodings[] = {
|
||||
{ "ASCII", ENCODING_ASCII },
|
||||
{ "US-ASCII", ENCODING_ASCII },
|
||||
{ "LATIN1", ENCODING_LATIN1 },
|
||||
{ "ISO-8859-1", ENCODING_LATIN1 },
|
||||
{ "UTF8", ENCODING_UTF8 },
|
||||
{ "UTF-8", ENCODING_UTF8 },
|
||||
{ "UTF16", ENCODING_UTF16 },
|
||||
{ "UTF-16", ENCODING_UTF16 },
|
||||
{ "UTF16BE", ENCODING_UTF16BE },
|
||||
{ "UTF-16BE", ENCODING_UTF16BE },
|
||||
{ "UTF16LE", ENCODING_UTF16LE },
|
||||
{ "UTF-16LE", ENCODING_UTF16LE },
|
||||
{ "UTF32", ENCODING_UTF32 },
|
||||
{ "UTF-32", ENCODING_UTF32 },
|
||||
{ "UTF32BE", ENCODING_UTF32BE },
|
||||
{ "UTF-32BE", ENCODING_UTF32BE },
|
||||
{ "UTF32LE", ENCODING_UTF32LE },
|
||||
{ "UTF-32LE", ENCODING_UTF32LE },
|
||||
{ "UCS2", ENCODING_UCS2 },
|
||||
{ "UCS-2", ENCODING_UCS2 },
|
||||
{ "UCS4", ENCODING_UCS4 },
|
||||
{ "UCS-4", ENCODING_UCS4 },
|
||||
};
|
||||
|
||||
SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
|
||||
{
|
||||
int src_fmt = ENCODING_UNKNOWN;
|
||||
int dst_fmt = ENCODING_UNKNOWN;
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
|
||||
if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
|
||||
src_fmt = encodings[i].format;
|
||||
if ( dst_fmt != ENCODING_UNKNOWN ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
|
||||
dst_fmt = encodings[i].format;
|
||||
if ( src_fmt != ENCODING_UNKNOWN ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
|
||||
SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
|
||||
if ( cd ) {
|
||||
cd->src_fmt = src_fmt;
|
||||
cd->dst_fmt = dst_fmt;
|
||||
return cd;
|
||||
}
|
||||
}
|
||||
return (SDL_iconv_t)-1;
|
||||
}
|
||||
|
||||
size_t SDL_iconv(SDL_iconv_t cd,
|
||||
char **inbuf, size_t *inbytesleft,
|
||||
char **outbuf, size_t *outbytesleft)
|
||||
{
|
||||
/* For simplicity, we'll convert everything to and from UCS-4 */
|
||||
char *src, *dst;
|
||||
size_t srclen, dstlen;
|
||||
Uint32 ch;
|
||||
size_t total;
|
||||
|
||||
if ( !inbuf || !*inbuf ) {
|
||||
/* Reset the context */
|
||||
return 0;
|
||||
}
|
||||
if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
src = *inbuf;
|
||||
srclen = (inbytesleft ? *inbytesleft : 0);
|
||||
dst = *outbuf;
|
||||
dstlen = *outbytesleft;
|
||||
|
||||
switch ( cd->src_fmt ) {
|
||||
case ENCODING_UTF16:
|
||||
/* Scan for a byte order marker */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
size_t n = srclen / 2;
|
||||
while ( n ) {
|
||||
if ( p[0] == 0xFF && p[1] == 0xFE ) {
|
||||
cd->src_fmt = ENCODING_UTF16BE;
|
||||
break;
|
||||
} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
|
||||
cd->src_fmt = ENCODING_UTF16LE;
|
||||
break;
|
||||
}
|
||||
p += 2;
|
||||
--n;
|
||||
}
|
||||
if ( n == 0 ) {
|
||||
/* We can't tell, default to host order */
|
||||
cd->src_fmt = ENCODING_UTF16NATIVE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF32:
|
||||
/* Scan for a byte order marker */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
size_t n = srclen / 4;
|
||||
while ( n ) {
|
||||
if ( p[0] == 0xFF && p[1] == 0xFE &&
|
||||
p[2] == 0x00 && p[3] == 0x00 ) {
|
||||
cd->src_fmt = ENCODING_UTF32BE;
|
||||
break;
|
||||
} else if ( p[0] == 0x00 && p[1] == 0x00 &&
|
||||
p[2] == 0xFE && p[3] == 0xFF ) {
|
||||
cd->src_fmt = ENCODING_UTF32LE;
|
||||
break;
|
||||
}
|
||||
p += 4;
|
||||
--n;
|
||||
}
|
||||
if ( n == 0 ) {
|
||||
/* We can't tell, default to host order */
|
||||
cd->src_fmt = ENCODING_UTF32NATIVE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
switch ( cd->dst_fmt ) {
|
||||
case ENCODING_UTF16:
|
||||
/* Default to host order, need to add byte order marker */
|
||||
if ( dstlen < 2 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
*(Uint16 *)dst = UNICODE_BOM;
|
||||
dst += 2;
|
||||
dstlen -= 2;
|
||||
cd->dst_fmt = ENCODING_UTF16NATIVE;
|
||||
break;
|
||||
case ENCODING_UTF32:
|
||||
/* Default to host order, need to add byte order marker */
|
||||
if ( dstlen < 4 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
*(Uint32 *)dst = UNICODE_BOM;
|
||||
dst += 4;
|
||||
dstlen -= 4;
|
||||
cd->dst_fmt = ENCODING_UTF32NATIVE;
|
||||
break;
|
||||
}
|
||||
|
||||
total = 0;
|
||||
while ( srclen > 0 ) {
|
||||
/* Decode a character */
|
||||
switch ( cd->src_fmt ) {
|
||||
case ENCODING_ASCII:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
ch = (Uint32)(p[0] & 0x7F);
|
||||
++src;
|
||||
--srclen;
|
||||
}
|
||||
break;
|
||||
case ENCODING_LATIN1:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
ch = (Uint32)p[0];
|
||||
++src;
|
||||
--srclen;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF8: /* RFC 3629 */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
size_t left = 0;
|
||||
SDL_bool overlong = SDL_FALSE;
|
||||
if ( p[0] >= 0xFC ) {
|
||||
if ( (p[0] & 0xFE) != 0xFC ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
} else {
|
||||
if ( p[0] == 0xFC ) {
|
||||
overlong = SDL_TRUE;
|
||||
}
|
||||
ch = (Uint32)(p[0] & 0x01);
|
||||
left = 5;
|
||||
}
|
||||
} else if ( p[0] >= 0xF8 ) {
|
||||
if ( (p[0] & 0xFC) != 0xF8 ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
} else {
|
||||
if ( p[0] == 0xF8 ) {
|
||||
overlong = SDL_TRUE;
|
||||
}
|
||||
ch = (Uint32)(p[0] & 0x03);
|
||||
left = 4;
|
||||
}
|
||||
} else if ( p[0] >= 0xF0 ) {
|
||||
if ( (p[0] & 0xF8) != 0xF0 ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
} else {
|
||||
if ( p[0] == 0xF0 ) {
|
||||
overlong = SDL_TRUE;
|
||||
}
|
||||
ch = (Uint32)(p[0] & 0x07);
|
||||
left = 3;
|
||||
}
|
||||
} else if ( p[0] >= 0xE0 ) {
|
||||
if ( (p[0] & 0xF0) != 0xE0 ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
} else {
|
||||
if ( p[0] == 0xE0 ) {
|
||||
overlong = SDL_TRUE;
|
||||
}
|
||||
ch = (Uint32)(p[0] & 0x0F);
|
||||
left = 2;
|
||||
}
|
||||
} else if ( p[0] >= 0xC0 ) {
|
||||
if ( (p[0] & 0xE0) != 0xC0 ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
} else {
|
||||
if ( (p[0] & 0xCE) == 0xC0 ) {
|
||||
overlong = SDL_TRUE;
|
||||
}
|
||||
ch = (Uint32)(p[0] & 0x1F);
|
||||
left = 1;
|
||||
}
|
||||
} else {
|
||||
if ( (p[0] & 0x80) != 0x00 ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
} else {
|
||||
ch = (Uint32)p[0];
|
||||
}
|
||||
}
|
||||
++src;
|
||||
--srclen;
|
||||
if ( srclen < left ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
while ( left-- ) {
|
||||
++p;
|
||||
if ( (p[0] & 0xC0) != 0x80 ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
break;
|
||||
}
|
||||
ch <<= 6;
|
||||
ch |= (p[0] & 0x3F);
|
||||
++src;
|
||||
--srclen;
|
||||
}
|
||||
if ( overlong ) {
|
||||
/* Potential security risk
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
|
||||
(ch == 0xFFFE || ch == 0xFFFF) ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF16BE: /* RFC 2781 */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
Uint16 W1, W2;
|
||||
if ( srclen < 2 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
W1 = ((Uint32)p[0] << 8) |
|
||||
(Uint32)p[1];
|
||||
src += 2;
|
||||
srclen -= 2;
|
||||
if ( W1 < 0xD800 || W1 > 0xDFFF ) {
|
||||
ch = (Uint32)W1;
|
||||
break;
|
||||
}
|
||||
if ( W1 > 0xDBFF ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
break;
|
||||
}
|
||||
if ( srclen < 2 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
p = src;
|
||||
W2 = ((Uint32)p[0] << 8) |
|
||||
(Uint32)p[1];
|
||||
src += 2;
|
||||
srclen -= 2;
|
||||
if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
break;
|
||||
}
|
||||
ch = (((Uint32)(W1 & 0x3FF) << 10) |
|
||||
(Uint32)(W2 & 0x3FF)) + 0x10000;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF16LE: /* RFC 2781 */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
Uint16 W1, W2;
|
||||
if ( srclen < 2 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
W1 = ((Uint32)p[1] << 8) |
|
||||
(Uint32)p[0];
|
||||
src += 2;
|
||||
srclen -= 2;
|
||||
if ( W1 < 0xD800 || W1 > 0xDFFF ) {
|
||||
ch = (Uint32)W1;
|
||||
break;
|
||||
}
|
||||
if ( W1 > 0xDBFF ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
break;
|
||||
}
|
||||
if ( srclen < 2 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
p = src;
|
||||
W2 = ((Uint32)p[1] << 8) |
|
||||
(Uint32)p[0];
|
||||
src += 2;
|
||||
srclen -= 2;
|
||||
if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
|
||||
/* Skip illegal sequences
|
||||
return SDL_ICONV_EILSEQ;
|
||||
*/
|
||||
ch = UNKNOWN_UNICODE;
|
||||
break;
|
||||
}
|
||||
ch = (((Uint32)(W1 & 0x3FF) << 10) |
|
||||
(Uint32)(W2 & 0x3FF)) + 0x10000;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF32BE:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
if ( srclen < 4 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
ch = ((Uint32)p[0] << 24) |
|
||||
((Uint32)p[1] << 16) |
|
||||
((Uint32)p[2] << 8) |
|
||||
(Uint32)p[3];
|
||||
src += 4;
|
||||
srclen -= 4;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF32LE:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)src;
|
||||
if ( srclen < 4 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
ch = ((Uint32)p[3] << 24) |
|
||||
((Uint32)p[2] << 16) |
|
||||
((Uint32)p[1] << 8) |
|
||||
(Uint32)p[0];
|
||||
src += 4;
|
||||
srclen -= 4;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UCS2:
|
||||
{
|
||||
Uint16 *p = (Uint16 *)src;
|
||||
if ( srclen < 2 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
ch = *p;
|
||||
src += 2;
|
||||
srclen -= 2;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UCS4:
|
||||
{
|
||||
Uint32 *p = (Uint32 *)src;
|
||||
if ( srclen < 4 ) {
|
||||
return SDL_ICONV_EINVAL;
|
||||
}
|
||||
ch = *p;
|
||||
src += 4;
|
||||
srclen -= 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Encode a character */
|
||||
switch ( cd->dst_fmt ) {
|
||||
case ENCODING_ASCII:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)dst;
|
||||
if ( dstlen < 1 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
if ( ch > 0x7F ) {
|
||||
*p = UNKNOWN_ASCII;
|
||||
} else {
|
||||
*p = (Uint8)ch;
|
||||
}
|
||||
++dst;
|
||||
--dstlen;
|
||||
}
|
||||
break;
|
||||
case ENCODING_LATIN1:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)dst;
|
||||
if ( dstlen < 1 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
if ( ch > 0xFF ) {
|
||||
*p = UNKNOWN_ASCII;
|
||||
} else {
|
||||
*p = (Uint8)ch;
|
||||
}
|
||||
++dst;
|
||||
--dstlen;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF8: /* RFC 3629 */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)dst;
|
||||
if ( ch > 0x7FFFFFFF ) {
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( ch <= 0x7F ) {
|
||||
if ( dstlen < 1 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
*p = (Uint8)ch;
|
||||
++dst;
|
||||
--dstlen;
|
||||
} else if ( ch <= 0x7FF ) {
|
||||
if ( dstlen < 2 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
|
||||
p[1] = 0x80 | (Uint8)(ch & 0x3F);
|
||||
dst += 2;
|
||||
dstlen -= 2;
|
||||
} else if ( ch <= 0xFFFF ) {
|
||||
if ( dstlen < 3 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
|
||||
p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
|
||||
p[2] = 0x80 | (Uint8)(ch & 0x3F);
|
||||
dst += 3;
|
||||
dstlen -= 3;
|
||||
} else if ( ch <= 0x1FFFFF ) {
|
||||
if ( dstlen < 4 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
|
||||
p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
|
||||
p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
|
||||
p[3] = 0x80 | (Uint8)(ch & 0x3F);
|
||||
dst += 4;
|
||||
dstlen -= 4;
|
||||
} else if ( ch <= 0x3FFFFFF ) {
|
||||
if ( dstlen < 5 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
|
||||
p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
|
||||
p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
|
||||
p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
|
||||
p[4] = 0x80 | (Uint8)(ch & 0x3F);
|
||||
dst += 5;
|
||||
dstlen -= 5;
|
||||
} else {
|
||||
if ( dstlen < 6 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
|
||||
p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
|
||||
p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
|
||||
p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
|
||||
p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
|
||||
p[5] = 0x80 | (Uint8)(ch & 0x3F);
|
||||
dst += 6;
|
||||
dstlen -= 6;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF16BE: /* RFC 2781 */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)dst;
|
||||
if ( ch > 0x10FFFF ) {
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( ch < 0x10000 ) {
|
||||
if ( dstlen < 2 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[0] = (Uint8)(ch >> 8);
|
||||
p[1] = (Uint8)ch;
|
||||
dst += 2;
|
||||
dstlen -= 2;
|
||||
} else {
|
||||
Uint16 W1, W2;
|
||||
if ( dstlen < 4 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
ch = ch - 0x10000;
|
||||
W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
|
||||
W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
|
||||
p[0] = (Uint8)(W1 >> 8);
|
||||
p[1] = (Uint8)W1;
|
||||
p[2] = (Uint8)(W2 >> 8);
|
||||
p[3] = (Uint8)W2;
|
||||
dst += 4;
|
||||
dstlen -= 4;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF16LE: /* RFC 2781 */
|
||||
{
|
||||
Uint8 *p = (Uint8 *)dst;
|
||||
if ( ch > 0x10FFFF ) {
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( ch < 0x10000 ) {
|
||||
if ( dstlen < 2 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[1] = (Uint8)(ch >> 8);
|
||||
p[0] = (Uint8)ch;
|
||||
dst += 2;
|
||||
dstlen -= 2;
|
||||
} else {
|
||||
Uint16 W1, W2;
|
||||
if ( dstlen < 4 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
ch = ch - 0x10000;
|
||||
W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
|
||||
W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
|
||||
p[1] = (Uint8)(W1 >> 8);
|
||||
p[0] = (Uint8)W1;
|
||||
p[3] = (Uint8)(W2 >> 8);
|
||||
p[2] = (Uint8)W2;
|
||||
dst += 4;
|
||||
dstlen -= 4;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF32BE:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)dst;
|
||||
if ( ch > 0x7FFFFFFF ) {
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( dstlen < 4 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[0] = (Uint8)(ch >> 24);
|
||||
p[1] = (Uint8)(ch >> 16);
|
||||
p[2] = (Uint8)(ch >> 8);
|
||||
p[3] = (Uint8)ch;
|
||||
dst += 4;
|
||||
dstlen -= 4;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UTF32LE:
|
||||
{
|
||||
Uint8 *p = (Uint8 *)dst;
|
||||
if ( ch > 0x7FFFFFFF ) {
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( dstlen < 4 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
p[3] = (Uint8)(ch >> 24);
|
||||
p[2] = (Uint8)(ch >> 16);
|
||||
p[1] = (Uint8)(ch >> 8);
|
||||
p[0] = (Uint8)ch;
|
||||
dst += 4;
|
||||
dstlen -= 4;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UCS2:
|
||||
{
|
||||
Uint16 *p = (Uint16 *)dst;
|
||||
if ( ch > 0xFFFF ) {
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( dstlen < 2 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
*p = (Uint16)ch;
|
||||
dst += 2;
|
||||
dstlen -= 2;
|
||||
}
|
||||
break;
|
||||
case ENCODING_UCS4:
|
||||
{
|
||||
Uint32 *p = (Uint32 *)dst;
|
||||
if ( ch > 0x7FFFFFFF ) {
|
||||
ch = UNKNOWN_UNICODE;
|
||||
}
|
||||
if ( dstlen < 4 ) {
|
||||
return SDL_ICONV_E2BIG;
|
||||
}
|
||||
*p = ch;
|
||||
dst += 4;
|
||||
dstlen -= 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update state */
|
||||
*inbuf = src;
|
||||
*inbytesleft = srclen;
|
||||
*outbuf = dst;
|
||||
*outbytesleft = dstlen;
|
||||
++total;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
int SDL_iconv_close(SDL_iconv_t cd)
|
||||
{
|
||||
if ( cd && cd != (SDL_iconv_t)-1 ) {
|
||||
SDL_free(cd);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !HAVE_ICONV */
|
||||
|
||||
char *SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft)
|
||||
{
|
||||
SDL_iconv_t cd;
|
||||
char *string;
|
||||
size_t stringsize;
|
||||
char *outbuf;
|
||||
size_t outbytesleft;
|
||||
size_t retCode = 0;
|
||||
|
||||
cd = SDL_iconv_open(tocode, fromcode);
|
||||
if ( cd == (SDL_iconv_t)-1 ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
stringsize = inbytesleft > 4 ? inbytesleft : 4;
|
||||
string = SDL_malloc(stringsize);
|
||||
if ( !string ) {
|
||||
SDL_iconv_close(cd);
|
||||
return NULL;
|
||||
}
|
||||
outbuf = string;
|
||||
outbytesleft = stringsize;
|
||||
SDL_memset(outbuf, 0, 4);
|
||||
|
||||
while ( inbytesleft > 0 ) {
|
||||
retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
switch (retCode) {
|
||||
case SDL_ICONV_E2BIG:
|
||||
{
|
||||
char *oldstring = string;
|
||||
stringsize *= 2;
|
||||
string = SDL_realloc(string, stringsize);
|
||||
if ( !string ) {
|
||||
SDL_iconv_close(cd);
|
||||
return NULL;
|
||||
}
|
||||
outbuf = string + (outbuf - oldstring);
|
||||
outbytesleft = stringsize - (outbuf - string);
|
||||
SDL_memset(outbuf, 0, 4);
|
||||
}
|
||||
break;
|
||||
case SDL_ICONV_EILSEQ:
|
||||
/* Try skipping some input data - not perfect, but... */
|
||||
++inbuf;
|
||||
--inbytesleft;
|
||||
break;
|
||||
case SDL_ICONV_EINVAL:
|
||||
case SDL_ICONV_ERROR:
|
||||
/* We can't continue... */
|
||||
inbytesleft = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
SDL_iconv_close(cd);
|
||||
|
||||
return string;
|
||||
}
|
|
@ -661,12 +661,12 @@ int SDL_strncmp(const char *str1, const char *str2, size_t maxlen)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_STRCASECMP
|
||||
#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
|
||||
int SDL_strcasecmp(const char *str1, const char *str2)
|
||||
{
|
||||
char a = 0;
|
||||
char b = 0;
|
||||
while (*str1 && *str2) {
|
||||
while ( *str1 && *str2 ) {
|
||||
a = SDL_tolower(*str1);
|
||||
b = SDL_tolower(*str2);
|
||||
if ( a != b )
|
||||
|
@ -678,6 +678,24 @@ int SDL_strcasecmp(const char *str1, const char *str2)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_STRNCASECMP
|
||||
int SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen)
|
||||
{
|
||||
char a = 0;
|
||||
char b = 0;
|
||||
while ( *str1 && *str2 && maxlen ) {
|
||||
a = SDL_tolower(*str1);
|
||||
b = SDL_tolower(*str2);
|
||||
if ( a != b )
|
||||
break;
|
||||
++str1;
|
||||
++str2;
|
||||
--maxlen;
|
||||
}
|
||||
return (int)((unsigned char)a - (unsigned char)b);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_SSCANF
|
||||
int SDL_sscanf(const char *text, const char *fmt, ...)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ EXE = @EXE@
|
|||
CFLAGS = @CFLAGS@
|
||||
LIBS = @LIBS@
|
||||
|
||||
TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE)
|
||||
TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testiconv$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE)
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
|
@ -50,6 +50,9 @@ testgl$(EXE): $(srcdir)/testgl.c
|
|||
testhread$(EXE): $(srcdir)/testhread.c
|
||||
$(CC) -o $@ $? $(CFLAGS) $(LIBS)
|
||||
|
||||
testiconv$(EXE): $(srcdir)/testiconv.c
|
||||
$(CC) -o $@ $? $(CFLAGS) $(LIBS)
|
||||
|
||||
testjoystick$(EXE): $(srcdir)/testjoystick.c
|
||||
$(CC) -o $@ $? $(CFLAGS) $(LIBS)
|
||||
|
||||
|
|
85
test/testiconv.c
Normal file
85
test/testiconv.c
Normal file
|
@ -0,0 +1,85 @@
|
|||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "SDL.h"
|
||||
|
||||
static SDL_bool testutf16(char *data)
|
||||
{
|
||||
Uint32 *p = (Uint32 *)data;
|
||||
while(*p) {
|
||||
if ( *p > 0x10FFFF ) {
|
||||
return SDL_FALSE;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
return SDL_TRUE;
|
||||
}
|
||||
|
||||
static size_t widelen(char *data)
|
||||
{
|
||||
size_t len = 0;
|
||||
Uint32 *p = (Uint32 *)data;
|
||||
while(*p++) {
|
||||
++len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const char * formats[] = {
|
||||
"UTF8",
|
||||
"UTF-8",
|
||||
"UTF16BE",
|
||||
"UTF-16BE",
|
||||
"UTF16LE",
|
||||
"UTF-16LE",
|
||||
"UTF32BE",
|
||||
"UTF-32BE",
|
||||
"UTF32LE",
|
||||
"UTF-32LE",
|
||||
"UCS4",
|
||||
"UCS-4",
|
||||
};
|
||||
char buffer[BUFSIZ];
|
||||
char *ucs4;
|
||||
char *test[2];
|
||||
int i, j, index = 0;
|
||||
FILE *file;
|
||||
int errors = 0;
|
||||
|
||||
if ( !argv[1] ) {
|
||||
argv[1] = "utf8.txt";
|
||||
}
|
||||
file = fopen(argv[1], "rb");
|
||||
if ( !file ) {
|
||||
fprintf(stderr, "Unable to open %s\n", argv[1]);
|
||||
return (1);
|
||||
}
|
||||
|
||||
while ( fgets(buffer, sizeof(buffer), file) ) {
|
||||
/* Convert to UCS-4 */
|
||||
ucs4 = SDL_iconv_string("UCS-4", "UTF-8", buffer, SDL_strlen(buffer)+1);
|
||||
size_t len = (widelen(ucs4)+1)*4;
|
||||
for ( i = 0; i < SDL_arraysize(formats); ++i ) {
|
||||
if ( (SDL_strncasecmp(formats[i], "UTF16", 5) == 0 ||
|
||||
SDL_strncasecmp(formats[i], "UTF-16", 6) == 0) &&
|
||||
!testutf16(ucs4) ) {
|
||||
continue;
|
||||
}
|
||||
test[0] = SDL_iconv_string(formats[i], "UCS-4", ucs4, len);
|
||||
test[1] = SDL_iconv_string("UCS-4", formats[i], test[0], len);
|
||||
if ( SDL_memcmp(test[1], ucs4, len) != 0 ) {
|
||||
fprintf(stderr, "FAIL: %s\n", formats[i]);
|
||||
++errors;
|
||||
}
|
||||
SDL_free(test[0]);
|
||||
SDL_free(test[1]);
|
||||
}
|
||||
test[0] = SDL_iconv_string("UTF-8", "UCS-4", ucs4, len);
|
||||
SDL_free(ucs4);
|
||||
fputs(test[0], stdout);
|
||||
SDL_free(test[0]);
|
||||
}
|
||||
return (errors ? errors + 1 : 0);
|
||||
}
|
BIN
test/utf8.txt
Normal file
BIN
test/utf8.txt
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue