COMMON: add Korean Johab string encoding

This commit is contained in:
athrxx 2022-06-13 00:13:23 +02:00
parent 9d65377f6b
commit b5079ca5c7
8 changed files with 17361 additions and 29 deletions

View file

@ -98,6 +98,8 @@ static const uint16 *windows949ConversionTable = 0;
static const uint16 *windows949ReverseConversionTable = 0;
static const uint16 *windows950ConversionTable = 0;
static uint16 *windows950ReverseConversionTable = 0;
static const uint16 *johabConversionTable = 0;
static const uint16 *johabReverseConversionTable = 0;
static const uint16 *loadCJKTable(File &f, int idx, size_t sz) {
f.seek(16 + idx * 4);
@ -142,6 +144,7 @@ static void loadCJKTables() {
windows932ConversionTable = loadCJKTable(f, 0, 47 * 192);
windows949ConversionTable = loadCJKTable(f, 1, 0x7e * 0xb2);
windows950ConversionTable = loadCJKTable(f, 2, 89 * 157);
johabConversionTable = loadCJKTable(f, 3, 80 * 188);
}
void releaseCJKTables() {
@ -329,6 +332,58 @@ void U32String::decodeWindows950(const char *src, uint32 len) {
}
}
static uint16 convertJohabToUCSReal(uint8 high, uint8 low) {
if (high >= 0x84 && high < 0xD4)
high -= 0x84;
else
return 0;
if (low >= 0x41 && low < 0x7F)
low -= 0x41;
else if (low >= 0x81 && low < 0xFF)
low -= (0x81 - (0x7F - 0x41));
else
return 0;
if (!johabConversionTable)
return 0;
uint16 idx = high * 188 + low;
return johabConversionTable[idx];
}
void U32String::decodeJohab(const char *src, uint32 len) {
ensureCapacity(len, false);
if (!cjk_tables_loaded)
loadCJKTables();
for (uint i = 0; i < len;) {
uint8 high = src[i++];
if ((high & 0x80) == 0x00) {
operator+=(high);
continue;
}
if (high == 0x80 || high == 0xff) {
operator+=(invalidCode);
continue;
}
if (i >= len) {
operator+=(invalidCode);
continue;
}
uint8 low = src[i++];
uint16 val = convertJohabToUCSReal(high, low);
operator+=(val ? val : invalidCode);
}
}
void String::encodeWindows932(const U32String &src) {
ensureCapacity(src.size() * 2, false);
@ -492,16 +547,18 @@ void String::encodeWindows950(const U32String &src, bool transliterate) {
if (!windows950ReverseConversionTable && windows950ConversionTable) {
uint16 *rt = new uint16[0x10000]();
for (uint lowidx = 0; lowidx < 157; lowidx++) {
for (uint lowidx = 0; lowidx < 0xb2; lowidx++) {
uint8 low = 0;
if (lowidx < 0x3f)
low = 0x40 + lowidx;
if (lowidx < 0x1a)
low = 0x41 + lowidx;
else if (lowidx < 0x1a * 2)
low = 0x61 + lowidx - 0x1a;
else
low = 0xa1 + lowidx - 0x3f;
low = 0x81 + lowidx - 0x1a * 2;
for (uint highidx = 0; highidx < 89; highidx++) {
uint8 high = highidx + 0xa1;
uint16 unicode = windows950ConversionTable[highidx * 157 + lowidx];
for (uint highidx = 0; highidx < 0x7e; highidx++) {
uint8 high = highidx + 0x81;
uint16 unicode = windows949ConversionTable[highidx * 0xb2 + lowidx];
rt[unicode] = (high << 8) | low;
}
@ -579,6 +636,54 @@ void String::encodeWindows950(const U32String &src, bool transliterate) {
}
}
void String::encodeJohab(const U32String &src) {
ensureCapacity(src.size() * 2, false);
if (!cjk_tables_loaded)
loadCJKTables();
if (!johabReverseConversionTable && johabConversionTable) {
uint16 *rt = new uint16[0x10000]();
for (uint lowidx = 0; lowidx < 188; lowidx++) {
uint8 low = 0;
if (lowidx < (0x7F - 0x41))
low = 0x41 + lowidx;
else
low = 0x81 + lowidx - (0x7F - 0x41);
for (uint highidx = 0; highidx < 80; highidx++) {
uint8 high = highidx + 0x84;
uint16 unicode = johabConversionTable[highidx * 188 + lowidx];
rt[unicode] = (high << 8) | low;
}
}
johabReverseConversionTable = rt;
}
for (uint i = 0; i < src.size();) {
uint32 point = src[i++];
if (point < 0x80) {
operator+=(point);
continue;
}
if (point > 0x10000 || !johabReverseConversionTable) {
operator+=('?');
continue;
}
uint16 rev = johabReverseConversionTable[point];
if (rev == 0) {
operator+=('?');
continue;
}
operator+=(rev >> 8);
operator+=(rev & 0xff);
}
}
// //TODO: This is a quick and dirty converter. Refactoring needed:
// 1. Original version has an option for performing strict / nonstrict
// conversion for the 0xD800...0xDFFF interval
@ -702,7 +807,6 @@ encodeUTF16Template(Native, WRITE_UINT16)
// Upper bound on unicode codepoint in any single-byte encoding. Must be divisible by 0x100 and be strictly above large codepoint
static const int kMaxCharSingleByte = 0x3000;
static const uint16 *
getConversionTable(CodePage page) {
switch (page) {
@ -747,6 +851,7 @@ getConversionTable(CodePage page) {
case kWindows932:
case kWindows949:
case kWindows950:
case kJohab:
return nullptr;
}
return nullptr;
@ -868,6 +973,9 @@ void String::encodeInternal(const U32String &src, CodePage page) {
case kWindows950:
encodeWindows950(src);
break;
case kJohab:
encodeJohab(src);
break;
default:
encodeOneByte(src, page);
break;
@ -909,6 +1017,9 @@ void U32String::decodeInternal(const char *str, uint32 len, CodePage page) {
case kWindows950:
decodeWindows950(str, len);
break;
case kJohab:
decodeJohab(str, len);
break;
default:
decodeOneByte(str, len, page);
break;

View file

@ -49,6 +49,7 @@ enum CodePage {
kDos850,
kDos862,
kDos866,
kJohab,
kASCII,
kLatin1 = kISO8859_1,

View file

@ -250,6 +250,7 @@ protected:
void encodeWindows932(const U32String &src);
void encodeWindows949(const U32String &src);
void encodeWindows950(const U32String &src, bool translit = true);
void encodeJohab(const U32String &src);
void encodeOneByte(const U32String &src, CodePage page, bool translit = true);
void encodeInternal(const U32String &src, CodePage page);
void translitChar(U32String::value_type point);

View file

@ -185,6 +185,7 @@ private:
void decodeWindows932(const char *src, uint32 len);
void decodeWindows949(const char *src, uint32 len);
void decodeWindows950(const char *src, uint32 len);
void decodeJohab(const char *src, uint32 len);
void decodeUTF8(const char *str, uint32 len);
friend class String;

File diff suppressed because it is too large Load diff

View file

@ -56,6 +56,11 @@ tables = [
FILE: "CP950.TXT",
HIGH: range(0xA1, 0xFA),
LOW: merge_ranges(range(0x40, 0x7f), range(0xa1, 0xff))
},
{
FILE: "JOHAB.TXT",
HIGH: range(0x84, 0xD4),
LOW: merge_ranges(range(0x41, 0x7f), range(0x81, 0xff))
}
]

Binary file not shown.

View file

@ -188,7 +188,7 @@ GUI_LoK::GUI_LoK(KyraEngine_LoK *vm, Screen_LoK *screen) : GUI_v1(vm), _vm(vm),
_saveLoadNumSlots = (vm->gameFlags().lang == Common::ZH_TWN) ? 4 : 5;
_confMusicMenuMod = (_vm->gameFlags().platform == Common::kPlatformFMTowns || _vm->gameFlags().platform == Common::kPlatformMacintosh) ? 3 : 2;
_resetHanInput = true;
_inputType = (_vm->gameFlags().lang == Common::KO_KOR) ? Font::kHANGUL : Font::kASCII;
_inputType = (_vm->gameFlags().lang == Common::KO_KOR) ? Font::kJohab : Font::kASCII;
_inputState = 0;
memset(_backupChars, 0, sizeof(_backupChars));
}
@ -586,16 +586,7 @@ void GUI_LoK::setupSavegames(Menu &menu, int num) {
if ((in = _vm->openSaveForReading(_vm->getSavegameFilename(_saveSlots[i + _savegameOffset]), header))) {
Common::strlcpy(_savegameNames[i], header.description.c_str(), ARRAYSIZE(_savegameNames[0]));
// Trim long GMM save descriptions to fit our save slots
_screen->_charSpacing = -2;
int fC = _screen->getTextWidth(_savegameNames[i]);
while (_savegameNames[i][0] && (fC > 240)) {
_savegameNames[i][strlen(_savegameNames[i]) - 1] = 0;
fC = _screen->getTextWidth(_savegameNames[i]);
}
_screen->_charSpacing = 0;
Util::convertString_GUItoKYRA(_savegameNames[i], 35, _vm->gameFlags().lang == Common::KO_KOR ? Common::kWindows949 : Common::kDos850);
Util::convertString_GUItoKYRA(_savegameNames[i], ARRAYSIZE(_savegameName), _vm->gameFlags().lang == Common::KO_KOR ? Common::kJohab : Common::kDos850);
if (_vm->gameFlags().lang == Common::JA_JPN || _vm->gameFlags().lang == Common::ZH_TWN) {
// Strip special characters from GMM save dialog which might get misinterpreted as SJIS
for (uint ii = 0; ii < strlen(_savegameNames[i]); ++ii) {
@ -604,6 +595,15 @@ void GUI_LoK::setupSavegames(Menu &menu, int num) {
}
}
// Trim long GMM save descriptions to fit our save slots
_screen->_charSpacing = -2;
int fC = _screen->getTextWidth(_savegameNames[i]);
while (_savegameNames[i][0] && (fC > (_vm->gameFlags().lang == Common::KO_KOR ? 250 : 240))) {
_savegameNames[i][strlen(_savegameNames[i]) - 1] = 0;
fC = _screen->getTextWidth(_savegameNames[i]);
}
_screen->_charSpacing = 0;
menu.item[i].itemString = _savegameNames[i];
menu.item[i].enabled = true;
menu.item[i].saveSlot = _saveSlots[i + _savegameOffset];
@ -749,22 +749,22 @@ void GUI_LoK::updateSavegameString() {
Util::convertISOToDOS(oneByteInput);
uint16 newTwoByteChar = 0;
if (_inputType == Font::kHANGUL)
newTwoByteChar = Util::convertDOSToHAN(oneByteInput);
if (_inputType == Font::kJohab)
newTwoByteChar = Util::convertDOSToJohab(oneByteInput);
if (newTwoByteChar) {
width += 9;
// Even if there is no space left we may still try to modify the last character.
if ((length < ARRAYSIZE(_savegameName)) && (width <= 272)) {
if ((length < ARRAYSIZE(_savegameName)) && (width <= 266)) {
uint16 prevTwoByteChar = (length > 1 && (_savegameName[length - 2] & 0x80)) ? READ_BE_UINT16(&_savegameName[length - 2]) : 0;
Util::mergeUpdateHANChars(prevTwoByteChar, newTwoByteChar, oneByteInput, _resetHanInput);
Util::mergeUpdateJohabChars(prevTwoByteChar, newTwoByteChar, oneByteInput, _resetHanInput);
if (prevTwoByteChar) {
WRITE_BE_UINT16(&_savegameName[length - 2], prevTwoByteChar);
_savegameName[length] = _savegameName[length + 1] = 0;
_backupChars[_inputState++] = prevTwoByteChar;
}
// A new character will only be added if there is still space left.
if (newTwoByteChar && (length < ARRAYSIZE(_savegameName) - 2) && (width <= 256)) {
if (newTwoByteChar && (length < ARRAYSIZE(_savegameName) - 2) && (width <= 250)) {
WRITE_BE_UINT16(&_savegameName[length], newTwoByteChar);
_savegameName[length + 2] = 0;
_backupChars[0] = newTwoByteChar;
@ -784,7 +784,7 @@ void GUI_LoK::updateSavegameString() {
}
} else if (_keyPressed.keycode == Common::KEYCODE_BACKSPACE || _keyPressed.keycode == Common::KEYCODE_DELETE) {
_resetHanInput = true;
if (_inputType == Font::kHANGUL && length > 1 && _inputState > 0) {
if (_inputType == Font::kJohab && length > 1 && _inputState > 0) {
if (_inputState > 1) {
// We allow step-by-step "deconstruction" of the last glyph, just like the original.
WRITE_BE_UINT16(&_savegameName[length - 2], _backupChars[(--_inputState) - 1]);
@ -795,7 +795,7 @@ void GUI_LoK::updateSavegameString() {
redrawTextfield();
} else if (length > 0) {
_savegameName[length - 1] = 0;
if (_inputType == Font::kHANGUL)
if (_inputType == Font::kJohab)
_inputState = checkHanInputState(_savegameName, length - 1);
redrawTextfield();
}
@ -836,7 +836,7 @@ int GUI_LoK::saveGame(Button *button) {
}
redrawTextfield();
if (_inputType == Font::kHANGUL)
if (_inputType == Font::kJohab)
_inputState = checkHanInputState(_savegameName, strlen(_savegameName));
_screen->setFont(cf);
@ -858,7 +858,7 @@ int GUI_LoK::saveGame(Button *button) {
if (_savegameOffset == 0 && _vm->_gameToLoad == 0)
_vm->_gameToLoad = getNextSavegameSlot();
if (_vm->_gameToLoad > 0) {
Util::convertString_KYRAtoGUI(_savegameName, 35, _vm->gameFlags().lang == Common::KO_KOR ? Common::kWindows949 : Common::kDos850);
Util::convertString_KYRAtoGUI(_savegameName, ARRAYSIZE(_savegameName), _vm->gameFlags().lang == Common::KO_KOR ? Common::kJohab : Common::kDos850);
_vm->updatePlayTimer();
Graphics::Surface thumb;
createScreenThumbnail(thumb);