ppsspp/Common/Data/Encoding/Utf16.h

95 lines
1.9 KiB
C
Raw Normal View History

#pragma once
#include <cstdint>
2020-09-29 12:53:18 +02:00
#include "Common/BitSet.h"
// Should optimize out.
#define UTF16_IS_LITTLE_ENDIAN (*(const uint16_t *)"\0\xff" >= 0x100)
template <bool is_little>
uint16_t UTF16_Swap(uint16_t u) {
if (is_little) {
return UTF16_IS_LITTLE_ENDIAN ? u : swap16(u);
} else {
return UTF16_IS_LITTLE_ENDIAN ? swap16(u) : u;
}
}
template <bool is_little>
struct UTF16_Type {
public:
2020-03-22 19:29:25 +01:00
static const char32_t INVALID = (char32_t)-1;
2020-03-22 19:29:25 +01:00
UTF16_Type(const char16_t *c) : c_(c), index_(0) {}
2020-03-22 19:29:25 +01:00
char32_t next() {
const char32_t u = UTF16_Swap<is_little>(c_[index_++]);
// Surrogate pair. UTF-16 is so simple. We assume it's valid.
2014-01-21 08:03:57 -08:00
if ((u & 0xF800) == 0xD800) {
return 0x10000 + (((u & 0x3FF) << 10) | (UTF16_Swap<is_little>(c_[index_++]) & 0x3FF));
}
return u;
}
bool end() const {
return c_[index_] == 0;
}
int length() const {
int len = 0;
for (UTF16_Type<is_little> dec(c_); !dec.end(); dec.next())
++len;
return len;
}
int shortIndex() const {
return index_;
}
2020-03-22 19:29:25 +01:00
static int encode(char16_t *dest, char32_t u) {
if (u >= 0x10000) {
u -= 0x10000;
2013-07-21 12:55:28 -07:00
*dest++ = UTF16_Swap<is_little>(0xD800 + ((u >> 10) & 0x3FF));
*dest = UTF16_Swap<is_little>(0xDC00 + ((u >> 0) & 0x3FF));
return 2;
} else {
2020-03-22 19:29:25 +01:00
*dest = UTF16_Swap<is_little>((char16_t)u);
return 1;
}
}
2020-03-22 19:29:25 +01:00
// Rejects non-UCS2 codepoints.
static int encodeUCS2(char16_t *dest, char32_t u) {
if (u >= 0x10000 || (u >= 0xD800 && u <= 0xDFFF)) {
return 0;
} else {
*dest = UTF16_Swap<is_little>((char16_t)u);
return 1;
}
}
static int encodeUnits(char32_t u) {
if (u >= 0x10000) {
return 2;
} else {
return 1;
}
}
2020-03-22 19:29:25 +01:00
static int encodeUnitsUCS2(char32_t u) {
if (u >= 0x10000 || (u >= 0xD800 && u <= 0xDFFF)) {
return 0;
} else {
return 1;
}
}
private:
2020-03-22 19:29:25 +01:00
const char16_t *c_;
int index_;
};
typedef UTF16_Type<true> UTF16LE;
typedef UTF16_Type<false> UTF16BE;