using System;
namespace PKHeX.Core;
///
/// Logic for converting a for Generation 2.
///
/// Slight differences when compared to .
public static class StringConverter2
{
public const byte TerminatorCode = StringConverter1.TerminatorCode;
public const byte TerminatorZero = StringConverter1.TerminatorZero;
public const byte TradeOTCode = StringConverter1.TradeOTCode;
public const byte SpaceCode = StringConverter1.SpaceCode;
public const byte LineBreakCode = 0x4E; // Mail
public const char Terminator = StringConverter1.Terminator;
public const char TradeOT = StringConverter1.TradeOT;
public const char LineBreak = '⏎'; // Mail
public static bool GetIsJapanese(ReadOnlySpan str) => StringConverter1.GetIsJapanese(str);
public static bool GetIsEnglish(ReadOnlySpan str) => !GetIsJapanese(str);
public static bool GetIsJapanese(ReadOnlySpan raw) => AllCharsInTable(raw, TableJP);
public static bool GetIsEnglish(ReadOnlySpan raw) => AllCharsInTable(raw, TableEN);
private static bool AllCharsInTable(ReadOnlySpan data, ReadOnlySpan table)
{
foreach (var c in data)
{
var b = table[c];
if (b == Terminator && c is not (TerminatorCode or TerminatorZero))
return false;
}
return true;
}
///
/// Converts Generation 1 encoded data into a string.
///
/// Encoded data.
/// Data source language.
/// Decoded string.
public static string GetString(ReadOnlySpan data, int language)
{
Span result = stackalloc char[data.Length];
int length = LoadString(data, result, language);
return new string(result[..length]);
}
///
/// Encoded data
/// Decoded character result buffer
/// Data source language.
/// Character count loaded.
public static int LoadString(ReadOnlySpan data, Span result, int language)
{
if (data.Length == 0)
return 0;
if (data[0] == TradeOTCode) // In-game Trade
{
result[0] = TradeOT;
return 1;
}
int i = 0;
var dict = GetDict(language);
for (; i < data.Length; i++)
{
var value = data[i];
var c = dict[value];
if (c == Terminator) // Stop if Terminator
break;
result[i] = c;
}
return i;
}
private static ReadOnlySpan GetDict(int language) => language switch
{
1 => TableJP,
3 or 5 => TableFRE,
4 or 7 => TableITA,
_ => TableEN,
};
///
/// Converts a string to Generation 1 encoded data.
///
/// Span of bytes to write encoded string data
/// Decoded string.
/// Maximum length of the input
/// Data source language.
/// Buffer pre-formatting option
/// Encoded data.
public static int SetString(Span destBuffer, ReadOnlySpan value, int maxLength, int language,
StringConverterOption option = StringConverterOption.Clear50)
{
ConditionBuffer(destBuffer, option);
if (value.Length == 0)
return 0;
if (value[0] == TradeOT) // Handle "[TRAINER]"
{
destBuffer[0] = TradeOTCode;
destBuffer[1] = TerminatorCode;
return 2;
}
if (value.Length > maxLength)
value = value[..maxLength]; // Hard cap
var dict = GetDict(language);
int i = 0;
for (; i < value.Length; i++)
{
if (!TryGetIndex(dict, value[i], out var index))
break;
destBuffer[i] = index;
}
int count = i;
if (count == destBuffer.Length)
return count;
destBuffer[count] = TerminatorCode;
return count + 1;
}
private static void ConditionBuffer(Span destBuffer, StringConverterOption option)
{
if (option is StringConverterOption.ClearZero)
destBuffer.Clear();
else if (option is StringConverterOption.Clear50)
destBuffer.Fill(TerminatorCode);
else if (option is StringConverterOption.Clear7F)
destBuffer.Fill(SpaceCode);
}
private static bool TryGetIndex(in ReadOnlySpan dict, char c, out byte result)
{
var index = dict.IndexOf(c);
if (index == -1)
return TryGetUserFriendlyRemap(dict, c, out result);
// \0 at index 0 shouldn't really be user-entered, check just in case
result = (byte)index;
return index != 0;
}
///
/// Tries to remap the user input to a valid character.
///
///
private static bool TryGetUserFriendlyRemap(in ReadOnlySpan dict, char c, out byte result)
{
if (StringConverter1.Hiragana.Contains(c))
{
int index = dict.IndexOf((char)(c + (char)0x60));
result = (byte)index;
return true; // Valid Hiragana will always be found if it's in the table
}
result = 0;
return false;
}
#region Gen 2 Character Tables
private const char NUL = StringConverter1.NUL;
private const char TOT = StringConverter1.TOT;
private const char LPK = StringConverter1.LPK; // Pk
private const char LMN = StringConverter1.LMN; // Mn
private const char MNY = StringConverter1.MNY; // Yen
private const char LPO = StringConverter1.LPO; // Po
private const char LKE = StringConverter1.LKE; // Ke
private const char LEA = StringConverter1.LEA; // é for Box/Mail
private const char DOT = StringConverter1.DOT; // . for MR.MIME (U+2024, not U+002E)
private const char SPF = StringConverter1.SPF; // Full-width space (U+3000)
private const char SPH = StringConverter1.SPH; // Half-width space
private const char RET = LineBreak; // Line break for Mail
private const char LAP = '’'; // Apostrophe
// 'd 'l 'm 'r 's 't 'v
// All are apostrophe-before.
private const string LigatureENG = "dlmrstv";
// c' d' j' l' m' n' p' s' 's t' u' y'
// All are apostrophe-after besides index 8 ('s)
private const string LigatureFRE = "cdjlmnpsstuy";
private const char LI0 = '0'; // 'd
private const char LI1 = '1'; // 'l
private const char LI2 = '2'; // 'm
private const char LI3 = '3'; // 'r
private const char LI4 = '4'; // 's
private const char LI5 = '5'; // 't
private const char LI6 = '6'; // 'v
private const char LI7 = '7';
private const char LI8 = '8';
private const char LI9 = '9';
private const char LIA = 'A';
private const char LIB = 'B';
///
/// English encoding table with unused indexes merged in from other languages that use them.
///
///
public static ReadOnlySpan TableEN =>
[
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 00-0F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 10-1F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 20-2F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 30-3F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 60-6F
LPO, LKE, '“', '”', NUL, '…', NUL, NUL, NUL, '┌', '─', '┐', '│', '└', '┘', SPH, // 70-7F
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 80-8F
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '(', ')', ':', ';', '[', ']', // 90-9F
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', // A0-AF
'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'è', 'é', 'ù', 'À', 'Á', // B0-BF
'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'È', 'É', 'Ì', 'Í', 'Ñ', 'Ò', 'Ó', 'Ù', 'Ú', 'á', // C0-CF
LI0, LI1, LI2, LI3, LI4, LI5, LI6, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, '←', // D0-DF
LAP, LPK, LMN, '-', '+', NUL, '?', '!', DOT, '&', LEA, '→', '▷', '▶', '▼', '♂', // E0-EF
MNY, '×', '.', '/', ',', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF
];
public static ReadOnlySpan TableFRE => // Also German
[
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 00-0F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 10-1F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 20-2F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 30-3F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 60-6F
LPO, LKE, '“', '”', NUL, '…', NUL, NUL, NUL, '┌', '─', '┐', '│', '└', '┘', SPH, // 70-7F
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 80-8F
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '(', ')', ':', ';', '[', ']', // 90-9F
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', // A0-AF
'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'è', 'é', 'ù', 'ß', 'ç', // B0-BF
'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'ë', 'ï', 'â', 'ô', 'û', 'ê', 'î', 'Ù', 'Ú', 'á', // C0-CF
NUL, NUL, NUL, NUL, LI0, LI1, LI2, LI3, LI4, LI5, LI6, LI7, LI8, LI9, LIA, LIB, // D0-DF
LAP, LPK, LMN, '-', '+', NUL, '?', '!', DOT, '&', LEA, '→', '▷', '▶', '▼', '♂', // E0-EF
MNY, '×', '.', '/', ',', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF
];
public static ReadOnlySpan TableITA => // Also Spanish
[
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 00-0F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 10-1F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 20-2F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 30-3F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 60-6F
LPO, LKE, '“', '”', NUL, '…', NUL, NUL, NUL, '┌', '─', '┐', '│', '└', '┘', SPH, // 70-7F
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 80-8F
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '(', ')', ':', ';', '[', ']', // 90-9F
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', // A0-AF
'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'è', 'é', 'ù', 'À', 'Á', // B0-BF
'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'È', 'É', 'Ì', 'Í', 'Ñ', 'Ò', 'Ó', 'Ù', 'Ú', 'á', // C0-CF
'ì', 'í', 'ñ', 'ò', 'ó', 'ú', 'º', NUL, LI0, LI1, LI2, LI3, LI4, LI5, LI6, NUL, // D0-DF
LAP, LPK, LMN, '-', '¿', '¡', '?', '!', DOT, '&', LEA, '→', '▷', '▶', '▼', '♂', // E0-EF
MNY, '×', '.', '/', ',', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF
];
private static void InsertLigature(Span result, char c, bool isAfter)
{
if (isAfter)
{
result[0] = c;
result[1] = LAP;
}
else
{
result[0] = LAP;
result[1] = c;
}
}
// Control codes used in Randy's mail
private const char NIS = '㋥'; // "に " (particle ni)
private const char NOS = '㋨'; // "の " (particle no)
public static ReadOnlySpan TableJP =>
[
NUL, NUL, NUL, NUL, NUL, 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', 'ゾ', 'ダ', // 00-0F
'ヂ', 'ヅ', 'デ', 'ド', NUL, NUL, NUL, NUL, NUL, 'バ', 'ビ', 'ブ', 'ボ', NIS, NUL, NUL, // 10-1F
NUL, NUL, NUL, NUL, NUL, NOS, 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', 'ぞ', // 20-2F
'だ', 'ぢ', 'づ', 'で', 'ど', NUL, NUL, NUL, NUL, NUL, 'ば', 'び', 'ぶ', 'ベ', 'ぼ', NUL, // 30-3F
'パ', 'ピ', 'プ', 'ポ', 'ぱ', 'ぴ', 'ぷ', 'ペ', 'ぽ', NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'ぃ', 'ぅ', // 60-6F
'「', '」', '『', '』', '・', '⋯', 'ぁ', 'ぇ', 'ぉ', NUL, NUL, NUL, NUL, NUL, NUL, SPF, // 70-7F
'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'シ', 'ス', 'セ', 'ソ', 'タ', // 80-8F
'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'ネ', 'ノ', 'ハ', 'ヒ', 'フ', 'ホ', 'マ', 'ミ', 'ム', // 90-9F
'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ッ', 'ャ', 'ュ', 'ョ', // A0-AF
'ィ', 'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ', // B0-BF
'た', 'ち', 'つ', 'て', 'と', 'な', 'に', 'ぬ', 'ね', 'の', 'は', 'ひ', 'ふ', 'ヘ', 'ほ', 'ま', // C0-CF
'み', 'む', 'め', 'も', 'や', 'ゆ', 'よ', 'ら', 'リ', 'る', 'れ', 'ろ', 'わ', 'を', 'ん', 'っ', // D0-DF
'ゃ', 'ゅ', 'ょ', 'ー', '゚', '゙', '?', '!', '。', 'ァ', 'ゥ', 'ェ', NUL, NUL, NUL, '♂', // E0-EF
MNY, '×', '.', '/', 'ォ', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF
];
#endregion
public static string InflateLigatures(string result, int language)
{
if (language is (int)LanguageID.Japanese or (int)LanguageID.Korean)
return result; // No ligatures in JPN/KOR
bool after = language is (int)LanguageID.French or (int)LanguageID.German;
var ligatures = after ? LigatureFRE : LigatureENG;
Span inflated = stackalloc char[result.Length * 2]; // worst case is double length
int index = 0;
foreach (var c in result)
{
if (!TryGetLigatureIndex(c, out var i) || i >= ligatures.Length)
{
inflated[index++] = c;
continue;
}
var ligature = ligatures[i];
InsertLigature(inflated[index..], ligature, after && i != 8);
index += 2;
}
if (index == result.Length)
return result; // Nothing changed.
return new string(inflated[..index]);
}
private static bool TryGetLigatureIndex(char c, out int index) => -1 != (index = LigatureList.IndexOf(c));
private static ReadOnlySpan LigatureList => [LI0, LI1, LI2, LI3, LI4, LI5, LI6, LI7, LI8, LI9, LIA, LIB];
private static char GetLigature(int ligatureIndex) => LigatureList[ligatureIndex];
public static int DeflateLigatures(ReadOnlySpan value, Span result, int language)
{
if (language is (int)LanguageID.Japanese or (int)LanguageID.Korean)
{
value.CopyTo(result);
return value.Length; // No ligatures in JPN/KOR
}
bool after = language is (int)LanguageID.French or (int)LanguageID.German;
var ligatures = after ? LigatureFRE : LigatureENG;
int index = 0;
for (var i = 0; i < value.Length; i++)
{
char c = value[i];
if (c is not (LAP or '\''))
{
if (index == result.Length)
return index; // Overflow (shouldn't happen for correctly-written strings)
result[index++] = c;
continue;
}
if (after && index != 0)
{
ref var prev = ref result[index - 1];
var ligatureIndex = ligatures.IndexOf(prev);
if (ligatureIndex != -1)
{
prev = ligatures[ligatureIndex];
continue;
}
}
if (index == result.Length)
return index; // Overflow (shouldn't happen for correctly-written strings)
if (i < value.Length - 1)
{
var next = value[i + 1];
var ligatureIndex = ligatures.IndexOf(next);
if (ligatureIndex != -1)
{
result[index++] = GetLigature(ligatureIndex);
i++;
continue;
}
}
result[index++] = c;
}
return index;
}
///
/// Converts foreign Mail from the language-unaware encoding used for English Gold/Silver back to its original, language-aware encoding.
///
/// Encoded data.
/// Mail language.
public static void DecodeMailEnglishGS(Span data, int language)
{
if (language is (int)LanguageID.French or (int)LanguageID.German)
DecodeMailFG(data);
else if (language is (int)LanguageID.Italian or (int)LanguageID.Spanish)
RemapMailIS(data);
}
///
/// Converts foreign Mail from its original, language-aware encoding to the language-unaware encoding used for English Gold/Silver.
///
/// Decoded data.
/// Mail language.
public static void EncodeMailEnglishGS(Span data, int language)
{
if (language is (int)LanguageID.French or (int)LanguageID.German)
EncodeMailFG(data);
else if (language is (int)LanguageID.Italian or (int)LanguageID.Spanish)
RemapMailIS(data);
}
// Remap 's, swap c' d' j' with unused spaces
// - English: 0xCD-CF (unused spaces), 0xD4-D6 ('s 't 'v), 0xDC (unused space)
// - French/German: 0xCD-CF (unused spaces), 0xD4-D6 (c' d' j'), 0xDC ('s)
private static void DecodeMailFG(Span data)
{
for (int i = 0; i < data.Length; i++)
{
var b = data[i];
if (b == 0xD4)
data[i] = 0xDC; // 's
else if (b is >= 0xCD and <= 0xCF)
data[i] += 0xD4 - 0xCD; // c' d' j' (shift up)
}
}
private static void EncodeMailFG(Span data)
{
for (int i = 0; i < data.Length; i++)
{
var b = data[i];
if (b == 0xDC)
data[i] = 0xD4; // 's
else if (b is >= 0xD4 and <= 0xD6)
data[i] -= 0xD4 - 0xCD; // c' d' j' (shift down)
}
}
// Swap upper/lower halves of 0xD0-DF
// - English: 0xD0-D7 (ligatures), 0xD8-DF (unused spaces)
// - Italian/Spanish: 0xD0-D7 (accented letters), 0xD8-DF (ligatures)
private static void RemapMailIS(Span data)
{
for (int i = 0; i < data.Length; i++)
{
if ((data[i] & 0xF0) == 0xD0)
data[i] ^= 0x08;
}
}
}