From 63d57f063dcee255ea8983540e300838c1bbbe48 Mon Sep 17 00:00:00 2001 From: The Gears of Progress Date: Thu, 22 Jan 2026 16:42:53 -0500 Subject: [PATCH] Pulling character arrays from sheet --- text_helper/main.py | 110 ++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 64 deletions(-) diff --git a/text_helper/main.py b/text_helper/main.py index a12d383..d101af2 100755 --- a/text_helper/main.py +++ b/text_helper/main.py @@ -6,6 +6,7 @@ import requests from collections import defaultdict import sys from pathlib import Path +import hashlib class Languages(Enum): Japanese = 0 @@ -30,47 +31,18 @@ json_file_path = BASE_DIR / 'output.json' mainDict = {} textSections = [] charArrays = { - "International": {}, - "Japanese": {}, + "International": [0] * 0x100, + "Japanese": [0] * 0x100, +} +charArrayOfLanguage = { + Languages.Japanese: charArrays["Japanese"], + Languages.English: charArrays["International"], + Languages.French: charArrays["International"], + Languages.German: charArrays["International"], + Languages.Italian: charArrays["International"], + Languages.SpanishEU: charArrays["International"], + Languages.SpanishLA: charArrays["International"], } - -engCharArray = [ -0x20, 0xC0, 0xC1, 0xC2, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0x20, 0xCE, 0xCF, 0xD2, 0xD3, 0xD4, -0x152, 0xD9, 0xDA, 0xDB, 0xD1, 0xDF, 0xE0, 0xE1, 0x20, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0x20, -0xEE, 0xEF, 0xF2, 0xF3, 0xF4, 0x153, 0xF9, 0xFA, 0xFB, 0xF1, 0xBA, 0xAA, 0x1D49, 0x26, 0x2B, 0x20, -0x20, 0x20, 0x20, 0x20, 0x20, 0x3D, 0x3B, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, -0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, -0x25AF, 0xBF, 0xA1, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xCD, 0x25, 0x28, 0x29, 0x20, 0x20, -0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE2, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xED, -0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2B07, 0x2B05, 0x27A1, 0x20, 0x20, 0x20, -0x20, 0x20, 0x20, 0x20, 0x1D49, 0x3C, 0x3E, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, -0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, -0x2B3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x21, 0x3F, 0x2E, 0x2D, 0x30FB, -0x2025, 0x201C, 0x201D, 0x2018, 0x2019, 0x2642, 0x2640, 0x20, 0x2C, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45, -0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, -0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, -0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6, -0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E, -] - -jpnCharArray = [ -0x20, 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x304B, 0x304D, 0x304F, 0x3051, 0x3053, 0x3055, 0x3057, 0x3059, 0x305B, 0x305D, -0x305F, 0x3061, 0x3064, 0x3066, 0x3068, 0x306A, 0x306B, 0x306C, 0x306D, 0x306E, 0x306F, 0x3072, 0x3075, 0x3078, 0x307B, 0x307E, -0x307F, 0x3080, 0x3081, 0x3082, 0x3084, 0x3086, 0x3088, 0x3089, 0x308A, 0x308B, 0x308C, 0x308D, 0x308F, 0x3092, 0x3093, 0x3041, -0x3043, 0x3045, 0x3047, 0x3049, 0x3083, 0x3085, 0x3087, 0x304C, 0x304E, 0x3050, 0x3052, 0x3054, 0x3056, 0x3058, 0x305A, 0x305C, -0x305E, 0x3060, 0x3062, 0x3065, 0x3067, 0x3069, 0x3070, 0x3073, 0x3076, 0x3079, 0x307C, 0x3071, 0x3074, 0x3077, 0x307A, 0x307D, -0x3063, 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30AB, 0x30AD, 0x30AF, 0x30B1, 0x30B3, 0x30B5, 0x30B7, 0x30B9, 0x30BB, 0x30BD, -0x30BF, 0x30C1, 0x30C4, 0x30C6, 0x30C8, 0x30CA, 0x30CB, 0x30CC, 0x30CD, 0x30CE, 0x30CF, 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30DE, -0x30DF, 0x30E0, 0x30E1, 0x30E2, 0x30E4, 0x30E6, 0x30E8, 0x30E9, 0x30EA, 0x30EB, 0x30EC, 0x30ED, 0x30EF, 0x30F2, 0x30F3, 0x30A1, -0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30E3, 0x30E5, 0x30E7, 0x30AC, 0x30AE, 0x30B0, 0x30B2, 0x30B4, 0x30B6, 0x30B8, 0x30BA, 0x30BC, -0x30BE, 0x30C0, 0x30C2, 0x30C5, 0x30C7, 0x30C9, 0x30D0, 0x30D3, 0x30D6, 0x30D9, 0x30DC, 0x30D1, 0x30D4, 0x30D7, 0x30DA, 0x30DD, -0x30C3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFF01, 0xFF1F, 0x3002, 0x30FC, 0x30FB, -0x30FB, 0x300E, 0x300F, 0x300C, 0x300D, 0x2642, 0x2640, 0x5186, 0x2E, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45, -0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, -0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, -0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6, -0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E, -] jpnCharWidthArray = [ 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, @@ -220,11 +192,11 @@ def split_sentence_into_lines(sentence, offset, pixelsPerChar, pixelsInLine, lan for char in word: if (pixelsPerChar == "Variable"): if(lang == Languages.Japanese): - wordLength += jpnCharWidthArray[convert_char_to_byte(ord(char), jpnCharArray, lang)] - spaceLength = jpnCharWidthArray[convert_char_to_byte(ord(' '), jpnCharArray, lang)] + wordLength += jpnCharWidthArray[convert_char_to_byte(ord(char), charArrayOfLanguage[lang], lang)] + spaceLength = jpnCharWidthArray[convert_char_to_byte(ord(' '), charArrayOfLanguage[lang], lang)] else: - wordLength += engCharWidthArray[convert_char_to_byte(ord(char), engCharArray, lang)] - spaceLength = engCharWidthArray[convert_char_to_byte(ord(' '), engCharArray, lang)] + wordLength += engCharWidthArray[convert_char_to_byte(ord(char), charArrayOfLanguage[lang], lang)] + spaceLength = engCharWidthArray[convert_char_to_byte(ord(' '), charArrayOfLanguage[lang], lang)] elif (pixelsPerChar == "Default"): if (lang == Languages.Japanese): @@ -285,7 +257,7 @@ def convert_char_to_byte(incoming, array, lang): index = 0 for val in array: - if val == incoming: + if str(val) == chr(incoming): return index index += 1 log_warning_error(lang, "Error", f"No match found for char [ {chr(incoming)} ]!") @@ -298,6 +270,16 @@ def log_warning_error(lang, type, text): mainDict[lang.name][nType][max(mainDict[lang.name][nType].keys(), default =- 1) + 1] = nText print(nText) +def hash_excel(path): + sheets = pd.read_excel(path, sheet_name=None) + h = hashlib.sha256() + for name in sorted(sheets): + h.update(name.encode()) + h.update(pd.util.hash_pandas_object( + sheets[name], index=True + ).values) + return h.digest() + def convert_item(ogDict, lang): line = ogDict["bytes"] numLines = ogDict["numLines"] @@ -306,16 +288,16 @@ def convert_item(ogDict, lang): include_box_breaks = ogDict["includeBoxBreaks"] if lang == Languages.Japanese: - arr = jpnCharArray + arr = charArrayOfLanguage[lang] list = jpnEscapeCharConversionList else: - arr = engCharArray + arr = charArrayOfLanguage[lang] list = itlEscapeCharConversionList for pair in list: if pair[0] in line: escapeString = "" for char in pair[1]: - escapeString += chr(arr[char]) + escapeString += arr[char] #print(f"Replacing {pair[0]} with {escapeString}!") line = line.replace(pair[0], escapeString) #print(line) @@ -378,10 +360,7 @@ def convert_item(ogDict, lang): outStr = newStr byteStr = "" - if lang == Languages.Japanese: - arr = jpnCharArray - else: - arr = engCharArray + arr = charArrayOfLanguage[lang] for char in outStr[:-1]: byteStr += f"{convert_char_to_byte(ord(char), arr, lang):02x} " if (len(outStr) > 0 and outStr[-1] != ' '): # Check if the last char is a space @@ -463,10 +442,8 @@ def download_xlsx_file(): else: # Online mode if old_file_path.exists(): - new_df = pd.read_excel(new_file_path, sheet_name="Translations") - old_df = pd.read_excel(old_file_path, sheet_name="Translations") - if new_df.equals(old_df): + if hash_excel(new_file_path) == hash_excel(old_file_path): print("Downloaded file is identical.") new_file_path.unlink() if json_file_path.exists(): @@ -483,10 +460,18 @@ def download_xlsx_file(): new_file_path.rename(old_file_path) def transfer_xlsx_to_dict(): - #print("\tGetting character arrays") - #currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Translations") - - #for arr in charArrays: + print("\tGetting character arrays") + currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Character Arrays", header=None) + offset = 0 + for key, value in charArrays.items(): + for r in range(16): + for c in range(16): + val = currSheet.iloc[r + 1, c + offset] + if pd.isna(val): + val = " " + value[r * 0x10 + c] = val + # print(charArrays[key]) + offset += 16 @@ -588,13 +573,10 @@ def output_json_file(): for item in mainDict[lang.name][section]: string = mainDict[lang.name][section][item]["bytes"].split(" ") outText = "" - if lang == Languages.Japanese: - arr = jpnCharArray - else: - arr = engCharArray + arr = charArrayOfLanguage[lang] for byte in string: byte = arr[int(byte, 16)] - outText += chr(byte) + outText += str(byte) mainDict[lang.name][section][item]["text"] = outText with open(dir + '/output.json', 'w') as jsonFile: