using System; namespace PKHeX.Core; /// /// Logic for converting a for Generation 2. /// /// Slight differences when compared to . public static class StringConverter2 { public const byte TerminatorCode = StringConverter1.TerminatorCode; public const byte TerminatorZero = StringConverter1.TerminatorZero; public const byte TradeOTCode = StringConverter1.TradeOTCode; public const byte SpaceCode = StringConverter1.SpaceCode; public const byte LineBreakCode = 0x4E; // Mail public const char Terminator = StringConverter1.Terminator; public const char TradeOT = StringConverter1.TradeOT; public const char LineBreak = '⏎'; // Mail public static bool GetIsJapanese(ReadOnlySpan str) => StringConverter1.GetIsJapanese(str); public static bool GetIsEnglish(ReadOnlySpan str) => !GetIsJapanese(str); public static bool GetIsJapanese(ReadOnlySpan raw) => AllCharsInTable(raw, TableJP); public static bool GetIsEnglish(ReadOnlySpan raw) => AllCharsInTable(raw, TableEN); private static bool AllCharsInTable(ReadOnlySpan data, ReadOnlySpan table) { foreach (var c in data) { var b = table[c]; if (b == Terminator && c is not (TerminatorCode or TerminatorZero)) return false; } return true; } /// /// Converts Generation 1 encoded data into a string. /// /// Encoded data. /// Data source language. /// Decoded string. public static string GetString(ReadOnlySpan data, int language) { Span result = stackalloc char[data.Length]; int length = LoadString(data, result, language); return new string(result[..length]); } /// /// Encoded data /// Decoded character result buffer /// Data source language. /// Character count loaded. public static int LoadString(ReadOnlySpan data, Span result, int language) { if (data.Length == 0) return 0; if (data[0] == TradeOTCode) // In-game Trade { result[0] = TradeOT; return 1; } int i = 0; var dict = GetDict(language); for (; i < data.Length; i++) { var value = data[i]; var c = dict[value]; if (c == Terminator) // Stop if Terminator break; result[i] = c; } return i; } private static ReadOnlySpan GetDict(int language) => language switch { 1 => TableJP, 3 or 5 => TableFRE, 4 or 7 => TableITA, _ => TableEN, }; /// /// Converts a string to Generation 1 encoded data. /// /// Span of bytes to write encoded string data /// Decoded string. /// Maximum length of the input /// Data source language. /// Buffer pre-formatting option /// Encoded data. public static int SetString(Span destBuffer, ReadOnlySpan value, int maxLength, int language, StringConverterOption option = StringConverterOption.Clear50) { ConditionBuffer(destBuffer, option); if (value.Length == 0) return 0; if (value[0] == TradeOT) // Handle "[TRAINER]" { destBuffer[0] = TradeOTCode; destBuffer[1] = TerminatorCode; return 2; } if (value.Length > maxLength) value = value[..maxLength]; // Hard cap var dict = GetDict(language); int i = 0; for (; i < value.Length; i++) { if (!TryGetIndex(dict, value[i], out var index)) break; destBuffer[i] = index; } int count = i; if (count == destBuffer.Length) return count; destBuffer[count] = TerminatorCode; return count + 1; } private static void ConditionBuffer(Span destBuffer, StringConverterOption option) { if (option is StringConverterOption.ClearZero) destBuffer.Clear(); else if (option is StringConverterOption.Clear50) destBuffer.Fill(TerminatorCode); else if (option is StringConverterOption.Clear7F) destBuffer.Fill(SpaceCode); } private static bool TryGetIndex(in ReadOnlySpan dict, char c, out byte result) { var index = dict.IndexOf(c); if (index == -1) return TryGetUserFriendlyRemap(dict, c, out result); // \0 at index 0 shouldn't really be user-entered, check just in case result = (byte)index; return index != 0; } /// /// Tries to remap the user input to a valid character. /// /// private static bool TryGetUserFriendlyRemap(in ReadOnlySpan dict, char c, out byte result) { if (StringConverter1.Hiragana.Contains(c)) { int index = dict.IndexOf((char)(c + (char)0x60)); result = (byte)index; return true; // Valid Hiragana will always be found if it's in the table } result = 0; return false; } #region Gen 2 Character Tables private const char NUL = StringConverter1.NUL; private const char TOT = StringConverter1.TOT; private const char LPK = StringConverter1.LPK; // Pk private const char LMN = StringConverter1.LMN; // Mn private const char MNY = StringConverter1.MNY; // Yen private const char LPO = StringConverter1.LPO; // Po private const char LKE = StringConverter1.LKE; // Ke private const char LEA = StringConverter1.LEA; // é for Box/Mail private const char DOT = StringConverter1.DOT; // . for MR.MIME (U+2024, not U+002E) private const char SPF = StringConverter1.SPF; // Full-width space (U+3000) private const char SPH = StringConverter1.SPH; // Half-width space private const char RET = LineBreak; // Line break for Mail private const char LAP = '’'; // Apostrophe // 'd 'l 'm 'r 's 't 'v // All are apostrophe-before. private const string LigatureENG = "dlmrstv"; // c' d' j' l' m' n' p' s' 's t' u' y' // All are apostrophe-after besides index 8 ('s) private const string LigatureFRE = "cdjlmnpsstuy"; private const char LI0 = '0'; // 'd private const char LI1 = '1'; // 'l private const char LI2 = '2'; // 'm private const char LI3 = '3'; // 'r private const char LI4 = '4'; // 's private const char LI5 = '5'; // 't private const char LI6 = '6'; // 'v private const char LI7 = '7'; private const char LI8 = '8'; private const char LI9 = '9'; private const char LIA = 'A'; private const char LIB = 'B'; /// /// English encoding table with unused indexes merged in from other languages that use them. /// /// public static ReadOnlySpan TableEN => [ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 00-0F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 10-1F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 20-2F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 30-3F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 60-6F LPO, LKE, '“', '”', NUL, '…', NUL, NUL, NUL, '┌', '─', '┐', '│', '└', '┘', SPH, // 70-7F 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 80-8F 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '(', ')', ':', ';', '[', ']', // 90-9F 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', // A0-AF 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'è', 'é', 'ù', 'À', 'Á', // B0-BF 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'È', 'É', 'Ì', 'Í', 'Ñ', 'Ò', 'Ó', 'Ù', 'Ú', 'á', // C0-CF LI0, LI1, LI2, LI3, LI4, LI5, LI6, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, '←', // D0-DF LAP, LPK, LMN, '-', '+', NUL, '?', '!', DOT, '&', LEA, '→', '▷', '▶', '▼', '♂', // E0-EF MNY, '×', '.', '/', ',', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF ]; public static ReadOnlySpan TableFRE => // Also German [ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 00-0F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 10-1F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 20-2F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 30-3F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 60-6F LPO, LKE, '“', '”', NUL, '…', NUL, NUL, NUL, '┌', '─', '┐', '│', '└', '┘', SPH, // 70-7F 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 80-8F 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '(', ')', ':', ';', '[', ']', // 90-9F 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', // A0-AF 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'è', 'é', 'ù', 'ß', 'ç', // B0-BF 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'ë', 'ï', 'â', 'ô', 'û', 'ê', 'î', 'Ù', 'Ú', 'á', // C0-CF NUL, NUL, NUL, NUL, LI0, LI1, LI2, LI3, LI4, LI5, LI6, LI7, LI8, LI9, LIA, LIB, // D0-DF LAP, LPK, LMN, '-', '+', NUL, '?', '!', DOT, '&', LEA, '→', '▷', '▶', '▼', '♂', // E0-EF MNY, '×', '.', '/', ',', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF ]; public static ReadOnlySpan TableITA => // Also Spanish [ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 00-0F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 10-1F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 20-2F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 30-3F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, // 60-6F LPO, LKE, '“', '”', NUL, '…', NUL, NUL, NUL, '┌', '─', '┐', '│', '└', '┘', SPH, // 70-7F 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 80-8F 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '(', ')', ':', ';', '[', ']', // 90-9F 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', // A0-AF 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'è', 'é', 'ù', 'À', 'Á', // B0-BF 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'È', 'É', 'Ì', 'Í', 'Ñ', 'Ò', 'Ó', 'Ù', 'Ú', 'á', // C0-CF 'ì', 'í', 'ñ', 'ò', 'ó', 'ú', 'º', NUL, LI0, LI1, LI2, LI3, LI4, LI5, LI6, NUL, // D0-DF LAP, LPK, LMN, '-', '¿', '¡', '?', '!', DOT, '&', LEA, '→', '▷', '▶', '▼', '♂', // E0-EF MNY, '×', '.', '/', ',', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF ]; private static void InsertLigature(Span result, char c, bool isAfter) { if (isAfter) { result[0] = c; result[1] = LAP; } else { result[0] = LAP; result[1] = c; } } // Control codes used in Randy's mail private const char NIS = '㋥'; // "に " (particle ni) private const char NOS = '㋨'; // "の " (particle no) public static ReadOnlySpan TableJP => [ NUL, NUL, NUL, NUL, NUL, 'ガ', 'ギ', 'グ', 'ゲ', 'ゴ', 'ザ', 'ジ', 'ズ', 'ゼ', 'ゾ', 'ダ', // 00-0F 'ヂ', 'ヅ', 'デ', 'ド', NUL, NUL, NUL, NUL, NUL, 'バ', 'ビ', 'ブ', 'ボ', NIS, NUL, NUL, // 10-1F NUL, NUL, NUL, NUL, NUL, NOS, 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ざ', 'じ', 'ず', 'ぜ', 'ぞ', // 20-2F 'だ', 'ぢ', 'づ', 'で', 'ど', NUL, NUL, NUL, NUL, NUL, 'ば', 'び', 'ぶ', 'ベ', 'ぼ', NUL, // 30-3F 'パ', 'ピ', 'プ', 'ポ', 'ぱ', 'ぴ', 'ぷ', 'ペ', 'ぽ', NUL, NUL, NUL, NUL, NUL, RET, NUL, // 40-4F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, TOT, NUL, NUL, // 50-5F NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'ぃ', 'ぅ', // 60-6F '「', '」', '『', '』', '・', '⋯', 'ぁ', 'ぇ', 'ぉ', NUL, NUL, NUL, NUL, NUL, NUL, SPF, // 70-7F 'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'シ', 'ス', 'セ', 'ソ', 'タ', // 80-8F 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'ネ', 'ノ', 'ハ', 'ヒ', 'フ', 'ホ', 'マ', 'ミ', 'ム', // 90-9F 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ッ', 'ャ', 'ュ', 'ョ', // A0-AF 'ィ', 'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ', // B0-BF 'た', 'ち', 'つ', 'て', 'と', 'な', 'に', 'ぬ', 'ね', 'の', 'は', 'ひ', 'ふ', 'ヘ', 'ほ', 'ま', // C0-CF 'み', 'む', 'め', 'も', 'や', 'ゆ', 'よ', 'ら', 'リ', 'る', 'れ', 'ろ', 'わ', 'を', 'ん', 'っ', // D0-DF 'ゃ', 'ゅ', 'ょ', 'ー', '゚', '゙', '?', '!', '。', 'ァ', 'ゥ', 'ェ', NUL, NUL, NUL, '♂', // E0-EF MNY, '×', '.', '/', 'ォ', '♀', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // F0-FF ]; #endregion public static string InflateLigatures(string result, int language) { if (language is (int)LanguageID.Japanese or (int)LanguageID.Korean) return result; // No ligatures in JPN/KOR bool after = language is (int)LanguageID.French or (int)LanguageID.German; var ligatures = after ? LigatureFRE : LigatureENG; Span inflated = stackalloc char[result.Length * 2]; // worst case is double length int index = 0; foreach (var c in result) { if (!TryGetLigatureIndex(c, out var i) || i >= ligatures.Length) { inflated[index++] = c; continue; } var ligature = ligatures[i]; InsertLigature(inflated[index..], ligature, after && i != 8); index += 2; } if (index == result.Length) return result; // Nothing changed. return new string(inflated[..index]); } private static bool TryGetLigatureIndex(char c, out int index) => -1 != (index = LigatureList.IndexOf(c)); private static ReadOnlySpan LigatureList => [LI0, LI1, LI2, LI3, LI4, LI5, LI6, LI7, LI8, LI9, LIA, LIB]; private static char GetLigature(int ligatureIndex) => LigatureList[ligatureIndex]; public static int DeflateLigatures(ReadOnlySpan value, Span result, int language) { if (language is (int)LanguageID.Japanese or (int)LanguageID.Korean) { value.CopyTo(result); return value.Length; // No ligatures in JPN/KOR } bool after = language is (int)LanguageID.French or (int)LanguageID.German; var ligatures = after ? LigatureFRE : LigatureENG; int index = 0; for (var i = 0; i < value.Length; i++) { char c = value[i]; if (c is not (LAP or '\'')) { if (index == result.Length) return index; // Overflow (shouldn't happen for correctly-written strings) result[index++] = c; continue; } if (after && index != 0) { ref var prev = ref result[index - 1]; var ligatureIndex = ligatures.IndexOf(prev); if (ligatureIndex != -1) { prev = ligatures[ligatureIndex]; continue; } } if (index == result.Length) return index; // Overflow (shouldn't happen for correctly-written strings) if (i < value.Length - 1) { var next = value[i + 1]; var ligatureIndex = ligatures.IndexOf(next); if (ligatureIndex != -1) { result[index++] = GetLigature(ligatureIndex); i++; continue; } } result[index++] = c; } return index; } /// /// Converts foreign Mail from the language-unaware encoding used for English Gold/Silver back to its original, language-aware encoding. /// /// Encoded data. /// Mail language. public static void DecodeMailEnglishGS(Span data, int language) { if (language is (int)LanguageID.French or (int)LanguageID.German) DecodeMailFG(data); else if (language is (int)LanguageID.Italian or (int)LanguageID.Spanish) RemapMailIS(data); } /// /// Converts foreign Mail from its original, language-aware encoding to the language-unaware encoding used for English Gold/Silver. /// /// Decoded data. /// Mail language. public static void EncodeMailEnglishGS(Span data, int language) { if (language is (int)LanguageID.French or (int)LanguageID.German) EncodeMailFG(data); else if (language is (int)LanguageID.Italian or (int)LanguageID.Spanish) RemapMailIS(data); } // Remap 's, swap c' d' j' with unused spaces // - English: 0xCD-CF (unused spaces), 0xD4-D6 ('s 't 'v), 0xDC (unused space) // - French/German: 0xCD-CF (unused spaces), 0xD4-D6 (c' d' j'), 0xDC ('s) private static void DecodeMailFG(Span data) { for (int i = 0; i < data.Length; i++) { var b = data[i]; if (b == 0xD4) data[i] = 0xDC; // 's else if (b is >= 0xCD and <= 0xCF) data[i] += 0xD4 - 0xCD; // c' d' j' (shift up) } } private static void EncodeMailFG(Span data) { for (int i = 0; i < data.Length; i++) { var b = data[i]; if (b == 0xDC) data[i] = 0xD4; // 's else if (b is >= 0xD4 and <= 0xD6) data[i] -= 0xD4 - 0xCD; // c' d' j' (shift down) } } // Swap upper/lower halves of 0xD0-DF // - English: 0xD0-D7 (ligatures), 0xD8-DF (unused spaces) // - Italian/Spanish: 0xD0-D7 (accented letters), 0xD8-DF (ligatures) private static void RemapMailIS(Span data) { for (int i = 0; i < data.Length; i++) { if ((data[i] & 0xF0) == 0xD0) data[i] ^= 0x08; } } }