It turns out that UCS2 and UCS4 are defined as big-endian encodings

This commit is contained in:
Sam Lantinga 2012-10-28 13:03:45 -07:00
parent 23bc30c0d9
commit 75cb962de6
2 changed files with 87 additions and 63 deletions

View file

@ -87,15 +87,21 @@ enum
ENCODING_UTF32, /* Needs byte order marker */
ENCODING_UTF32BE,
ENCODING_UTF32LE,
ENCODING_UCS2, /* Native byte order assumed */
ENCODING_UCS4, /* Native byte order assumed */
ENCODING_UCS2BE,
ENCODING_UCS2LE,
ENCODING_UCS4BE,
ENCODING_UCS4LE,
};
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
#define ENCODING_UCS2NATIVE ENCODING_UCS2BE
#define ENCODING_UCS4NATIVE ENCODING_UCS4BE
#else
#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
#define ENCODING_UCS2NATIVE ENCODING_UCS2LE
#define ENCODING_UCS4NATIVE ENCODING_UCS4LE
#endif
struct _SDL_iconv_t
@ -128,10 +134,16 @@ static struct
{ "UTF-32BE", ENCODING_UTF32BE },
{ "UTF32LE", ENCODING_UTF32LE },
{ "UTF-32LE", ENCODING_UTF32LE },
{ "UCS2", ENCODING_UCS2 },
{ "UCS-2", ENCODING_UCS2 },
{ "UCS4", ENCODING_UCS4 },
{ "UCS-4", ENCODING_UCS4 },
{ "UCS2", ENCODING_UCS2BE },
{ "UCS-2", ENCODING_UCS2BE },
{ "UCS-2LE", ENCODING_UCS2LE },
{ "UCS-2BE", ENCODING_UCS2BE },
{ "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
{ "UCS4", ENCODING_UCS4BE },
{ "UCS-4", ENCODING_UCS4BE },
{ "UCS-4LE", ENCODING_UCS4LE },
{ "UCS-4BE", ENCODING_UCS4BE },
{ "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
/* *INDENT-ON* */
};
@ -518,6 +530,29 @@ SDL_iconv(SDL_iconv_t cd,
(Uint32) (W2 & 0x3FF)) + 0x10000;
}
break;
case ENCODING_UCS2LE:
{
Uint8 *p = (Uint8 *) src;
if (srclen < 2) {
return SDL_ICONV_EINVAL;
}
ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
src += 2;
srclen -= 2;
}
break;
case ENCODING_UCS2BE:
{
Uint8 *p = (Uint8 *) src;
if (srclen < 2) {
return SDL_ICONV_EINVAL;
}
ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
src += 2;
srclen -= 2;
}
break;
case ENCODING_UCS4BE:
case ENCODING_UTF32BE:
{
Uint8 *p = (Uint8 *) src;
@ -531,6 +566,7 @@ SDL_iconv(SDL_iconv_t cd,
srclen -= 4;
}
break;
case ENCODING_UCS4LE:
case ENCODING_UTF32LE:
{
Uint8 *p = (Uint8 *) src;
@ -544,28 +580,6 @@ SDL_iconv(SDL_iconv_t cd,
srclen -= 4;
}
break;
case ENCODING_UCS2:
{
Uint16 *p = (Uint16 *) src;
if (srclen < 2) {
return SDL_ICONV_EINVAL;
}
ch = *p;
src += 2;
srclen -= 2;
}
break;
case ENCODING_UCS4:
{
Uint32 *p = (Uint32 *) src;
if (srclen < 4) {
return SDL_ICONV_EINVAL;
}
ch = *p;
src += 4;
srclen -= 4;
}
break;
}
/* Encode a character */
@ -728,12 +742,46 @@ SDL_iconv(SDL_iconv_t cd,
}
}
break;
case ENCODING_UTF32BE:
case ENCODING_UCS2BE:
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0x10FFFF) {
if (ch > 0xFFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 2) {
return SDL_ICONV_E2BIG;
}
p[0] = (Uint8) (ch >> 8);
p[1] = (Uint8) ch;
dst += 2;
dstlen -= 2;
}
break;
case ENCODING_UCS2LE:
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0xFFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 2) {
return SDL_ICONV_E2BIG;
}
p[1] = (Uint8) (ch >> 8);
p[0] = (Uint8) ch;
dst += 2;
dstlen -= 2;
}
break;
case ENCODING_UTF32BE:
if (ch > 0x10FFFF) {
ch = UNKNOWN_UNICODE;
}
case ENCODING_UCS4BE:
if (ch > 0x7FFFFFFF) {
ch = UNKNOWN_UNICODE;
}
{
Uint8 *p = (Uint8 *) dst;
if (dstlen < 4) {
return SDL_ICONV_E2BIG;
}
@ -746,11 +794,15 @@ SDL_iconv(SDL_iconv_t cd,
}
break;
case ENCODING_UTF32LE:
if (ch > 0x10FFFF) {
ch = UNKNOWN_UNICODE;
}
case ENCODING_UCS4LE:
if (ch > 0x7FFFFFFF) {
ch = UNKNOWN_UNICODE;
}
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0x10FFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 4) {
return SDL_ICONV_E2BIG;
}
@ -762,34 +814,6 @@ SDL_iconv(SDL_iconv_t cd,
dstlen -= 4;
}
break;
case ENCODING_UCS2:
{
Uint16 *p = (Uint16 *) dst;
if (ch > 0xFFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 2) {
return SDL_ICONV_E2BIG;
}
*p = (Uint16) ch;
dst += 2;
dstlen -= 2;
}
break;
case ENCODING_UCS4:
{
Uint32 *p = (Uint32 *) dst;
if (ch > 0x7FFFFFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 4) {
return SDL_ICONV_E2BIG;
}
*p = ch;
dst += 4;
dstlen -= 4;
}
break;
}
/* Update state */