# import pandas lib as pd import pandas as pd import os from enum import Enum import json import requests from collections import defaultdict import copy import math import sys import filecmp update = True print ("Running text_helper:") if (update == True): url = 'https://docs.google.com/spreadsheets/d/14LLs5lLqWasFcssBmJdGXjjYxARAJBa_QUOUhXZt4v8/export?format=xlsx' new_file_path = 'text_helper/new_text.xlsx' old_file_path = 'text_helper/text.xlsx' json_file_path = 'text_helper/output.json' no_file = False try: response = requests.get(url, timeout=5) response.raise_for_status() if response.status_code == 200: with open(new_file_path, 'wb') as file: file.write(response.content) print('File downloaded successfully') except requests.exceptions.ReadTimeout as errrt: if os.path.exists(old_file_path): print("Connection timed out. Continuing with locally downloaded file.") no_file = True else: print("xlsx file is missing and connection timed out. Exiting...") except requests.exceptions.ConnectionError as conerr: if os.path.exists(old_file_path): print("Connection error. Continuing with locally downloaded file.") no_file = True else: print("xlsx file is missing and connection timed out. Exiting...") if os.path.exists(old_file_path): if (not no_file): new_file = pd.read_excel(new_file_path, sheet_name="Translations") old_file = pd.read_excel(old_file_path, sheet_name="Translations") if no_file or new_file.equals(old_file): if os.path.exists(json_file_path): print("Downloaded file is identical. Skipping parse\n") if (not no_file): os.remove(new_file_path) exit() print("json file missing - forcing rebuild.") os.remove(old_file_path) os.rename(new_file_path, old_file_path) else: print("xlsx file missing - forcing rebuild.") os.rename(new_file_path, old_file_path) engCharArray = [ 0x20, 0xC0, 0xC1, 0xC2, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0x20, 0xCE, 0xCF, 0xD2, 0xD3, 0xD4, 0x152, 0xD9, 0xDA, 0xDB, 0xD1, 0xDF, 0xE0, 0xE1, 0x20, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0x20, 0xEE, 0xEF, 0xF2, 0xF3, 0xF4, 0x153, 0xF9, 0xFA, 0xFB, 0xF1, 0xBA, 0xAA, 0x1D49, 0x26, 0x2B, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3D, 0x3B, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x25AF, 0xBF, 0xA1, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xCD, 0x25, 0x28, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE2, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xED, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2B07, 0x2B05, 0x27A1, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x1D49, 0x3C, 0x3E, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2B3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x21, 0x3F, 0x2E, 0x2D, 0x30FB, 0x2026, 0x201C, 0x201D, 0x2018, 0x2019, 0x2642, 0x2640, 0x20, 0x2C, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6, 0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E, ] engCharWidthArray = [ 0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6, 0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6, 0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x9, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0xA, 0x8, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6, 0x6, 0x4, 0x8, 0x8, 0x8, 0x7, 0x8, 0x8, 0x4, 0x6, 0x6, 0x4, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x7, 0x7, 0x7, 0x2, 0x3, 0x4, 0x5, 0x5, 0x6, 0x7, 0x5, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x6, 0x3, 0x6, 0x3, 0x6, 0x6, 0x6, 0x3, 0x3, 0x6, 0x6, 0x6, 0x3, 0x7, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x5, 0x6, 0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x5, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x8, 0x3, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x38, 0x0, 0x0, ] jpnCharArray = [ 0x20, 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x304B, 0x304D, 0x304F, 0x3051, 0x3053, 0x3055, 0x3057, 0x3059, 0x305B, 0x305D, 0x305F, 0x3061, 0x3064, 0x3066, 0x3068, 0x306A, 0x306B, 0x306C, 0x306D, 0x306E, 0x306F, 0x3072, 0x3075, 0x3078, 0x307B, 0x307E, 0x307F, 0x3080, 0x3081, 0x3082, 0x3084, 0x3086, 0x3088, 0x3089, 0x308A, 0x308B, 0x308C, 0x308D, 0x308F, 0x3092, 0x3093, 0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3083, 0x3085, 0x3087, 0x304C, 0x304E, 0x3050, 0x3052, 0x3054, 0x3056, 0x3058, 0x305A, 0x305C, 0x305E, 0x3060, 0x3062, 0x3065, 0x3067, 0x3069, 0x3070, 0x3073, 0x3076, 0x3079, 0x307C, 0x3071, 0x3074, 0x3077, 0x307A, 0x307D, 0x3063, 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30AB, 0x30AD, 0x30AF, 0x30B1, 0x30B3, 0x30B5, 0x30B7, 0x30B9, 0x30BB, 0x30BD, 0x30BF, 0x30C1, 0x30C4, 0x30C6, 0x30C8, 0x30CA, 0x30CB, 0x30CC, 0x30CD, 0x30CE, 0x30CF, 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30DE, 0x30DF, 0x30E0, 0x30E1, 0x30E2, 0x30E4, 0x30E6, 0x30E8, 0x30E9, 0x30EA, 0x20, 0x30EC, 0x30ED, 0x30EF, 0x30F2, 0x30F3, 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30E3, 0x30E5, 0x30E7, 0x30AC, 0x30AE, 0x30B0, 0x30B2, 0x30B4, 0x30B6, 0x30B8, 0x30BA, 0x30BC, 0x30BE, 0x30C0, 0x30C2, 0x30C5, 0x30C7, 0x30C9, 0x30D0, 0x30D3, 0x30D6, 0x30D9, 0x30DC, 0x30D1, 0x30D4, 0x30D7, 0x30DA, 0x30DD, 0x30C3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFF01, 0xFF1F, 0x3002, 0x30FC, 0x30FB, 0x30FB, 0x300E, 0x300F, 0x300C, 0x300D, 0x2642, 0x2640, 0x5186, 0x2E, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6, 0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E, ] jpnCharWidthArray = [ 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x28, 0x0, 0x0, ] charConversionList = [ # replaces the first char in the list with the latter ["'", "’"], ] def convertByte(incoming, array): for pair in charConversionList: if incoming == ord(pair[0]): incoming = ord(pair[1]) #print(f"Warning! {pair[0]} found, replacing with {pair[1]} !") next_key = max(mainDict[lang.name]["Warnings"].keys(), default =- 1) + 1 mainDict[lang.name]["Warnings"][next_key] = f"Warning! {pair[0]} found, replacing with {pair[1]} !" index = 0 for val in array: if val == incoming: return index index += 1 #print(f"Error! No match found for char [ {chr(incoming)} ]!") next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1 mainDict[lang.name]["Errors"][next_key] = f"Error! No match found for char [ {chr(incoming)} ]!" return 0 def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine): # If we can optimize this to remove the spaces, it could save a few bytes. outStr = "" currLine = "" lineCount = 0 currWordIndex = 0 lineLength = 0 spaceLength = 0 words = sentence.split() while(currWordIndex < len(words)): word = words[currWordIndex] wordLength = 0 # print(word) # Figure out the length of the word in pixels for char in word: if (pixelsPerChar == "Variable"): if(lang == Languages.Japanese): wordLength += jpnCharWidthArray[convertByte(ord(char), engCharArray)] spaceLength = jpnCharWidthArray[convertByte(ord(' '), engCharArray)] else: wordLength += engCharWidthArray[convertByte(ord(char), engCharArray)] spaceLength = engCharWidthArray[convertByte(ord(' '), engCharArray)] elif (pixelsPerChar == "Default"): if (lang == Languages.Japanese): wordLength += 8 spaceLength = 8 else: wordLength += 6 spaceLength = 6 # See if the whole sentence is a newline if (sentence == "Ň"): outStr += "Ň" currLine = "" lineCount += 1 offset = 0 lineLength = 0 currWordIndex += 1 # See if the sentence is a new box elif(sentence == "Ş" or sentence == "ȼ"): outStr += sentence currLine = "" offset = 0 lineLength = 0 currWordIndex += 1 # Test if the word is too long in general elif (wordLength > pixelsInLine): #print(f"ERROR: Word {word} exceeds alloted length") next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1 mainDict[lang.name]["Errors"][next_key] = f"ERROR: Word {word} exceeds alloted length" currWordIndex += 1 # Test if adding the word will go over our alloted space elif ((wordLength + lineLength + offset) <= pixelsInLine): # If not, add the word and increase the index currLine += (word + " ") lineLength += (wordLength + spaceLength) currWordIndex += 1 # We need to move to the next line else: # Every line should already have a space at the end of it. Remove it here outStr += (currLine[:-1] + "Ň") currLine = "" lineCount += 1 lineLength = 0 offset = 0 outStr += currLine return lineLength + offset, lineCount, outStr # -*- coding: utf-8 -*- import re alphabets= r"([A-Za-z])" prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]" suffixes = r"(Inc|Ltd|Jr|Sr|Co)" starters = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)" acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)" websites = r"[.](com|net|org|io|gov|edu|me)" digits = r"([0-9])" multiple_dots = r'\.{2,}' def split_into_sentences(text: str) -> list[str]: """ Split the text into sentences. If the text contains substrings "" or "", they would lead to incorrect splitting because they are used as markers for splitting. :param text: text to be split into sentences :type text: str :return: list of sentences :rtype: list[str] """ text = " " + text + " " text = text.replace("\n"," ") text = re.sub(prefixes,"\\1",text) text = re.sub(websites,"\\1",text) text = re.sub(digits + "[.]" + digits,"\\1\\2",text) text = re.sub(multiple_dots, lambda match: "" * len(match.group(0)) + "", text) if "Ph.D" in text: text = text.replace("Ph.D.","PhD") text = re.sub(r"\s" + alphabets + "[.] "," \\1 ",text) text = re.sub(acronyms+" "+starters,"\\1 \\2",text) text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1\\2\\3",text) text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1\\2",text) text = re.sub(" "+suffixes+"[.] "+starters," \\1 \\2",text) text = re.sub(" "+suffixes+"[.]"," \\1",text) text = re.sub(" " + alphabets + "[.]"," \\1",text) if "”" in text: text = text.replace(".”","”.") if "\"" in text: text = text.replace(".\"","\".") if "!" in text: text = text.replace("!\"","\"!") if "?" in text: text = text.replace("?\"","\"?") if "。" in text: text = text.replace("。\"","\"。") # Added for Japanese support if "?" in text: text = text.replace("?\"","\"?") # Added for Japanese support if "!" in text: text = text.replace("!\"","\"!") # Added for Japanese support text = text.replace(".",".") text = text.replace("?","?") text = text.replace("!","!") text = text.replace("。","。") # Added for Japanese support text = text.replace("?","?") # Added for Japanese support text = text.replace("!","!") # Added for Japanese support text = text.replace("",".") text = text.replace("Ň", "Ň") # Split newlines into their own sentences text = text.replace("ȼ", "ȼ") # Split new boxes into their own sentences text = text.replace("Ş", "Ş") # Split new boxes into their own sentences sentences = text.split("") sentences = [s.strip() for s in sentences] if sentences and not sentences[-1]: sentences = sentences[:-1] return sentences class Languages(Enum): Japanese = 0 English = 1 French = 2 German = 3 Italian = 4 SpanishEU = 5 SpanishLA = 6 # read by default 1st sheet of an excel file dir = os.curdir + "/text_helper" mainDict = {} for lang in Languages: mainDict[lang.name] = { "PTGB": {}, "RSEFRLG": {}, "GB": {}, "GENERAL": {}, "CREDITS": {}, "PKMN_NAMES": {}, "Warnings" : {}, "Errors": {}, } def convert_item(ogDict): line = ogDict["bytes"] numLines = ogDict["numLines"] pixelsPerChar = ogDict["pixelsPerChar"] pixelsInLine = ogDict["pixelsInLine"] include_box_breaks = ogDict["includeBoxBreaks"] split_sents = split_into_sentences(line) index = 0 outStr = "" currLine = 0 offset = 0 escapeCount = 0 while index < len(split_sents) and escapeCount < 100: offset, recievedLine, out = SplitSentenceIntoLines(split_sents[index], offset, pixelsPerChar, pixelsInLine) currLine += recievedLine if (out == "ȼ"): offset = 0 currLine = 0 outStr = outStr[:-1] outStr += "ȼ" index += 1 elif (currLine < numLines): #print(split_sents[index]) index += 1 outStr += out else: outStr = outStr[:-1] outStr += "ȼ" # new textbox character offset = 0 currLine = 0 escapeCount += 1 #print(index) if not include_box_breaks: #print(f"ERROR! Made a line break when disabled, sentence \"{outStr}\" is too long!") next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1 mainDict[lang.name]["Errors"][next_key] = f"ERROR! Made a line break when disabled, sentence \"{outStr}\" is too long!" if escapeCount == 100: #print(f"ERROR! Sentence \"{out}\" is too long!") next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1 mainDict[lang.name]["Errors"][next_key] = f"ERROR! Sentence \"{out}\" is too long!" # Some cases that should be fixed exitLoop = False while(not exitLoop): newStr = outStr # A space right before a newline just takes up space newStr = newStr.replace(" Ň", "Ň") # Newlines shouldn't happen right after a new textbox newStr = newStr.replace("ȼŇ", "ȼ") # Nor should newlines be right before a new textbox newStr = newStr.replace("Ňȼ", "ȼ") # Nor should a new textbox be after a new textbox newStr = newStr.replace("ȼȼ", "ȼ") # Nor should a new scroll be after a new textbox newStr = newStr.replace("Şȼ", "Ş") # Nor should a new scroll be after a new textbox newStr = newStr.replace("ȼŞ", "ȼ") exitLoop = (newStr == outStr) outStr = newStr byteStr = "" if lang == Languages.Japanese: arr = jpnCharArray else: arr = engCharArray for char in outStr[:-1]: byteStr += f"{convertByte(ord(char), arr):02x} " if (len(outStr) > 0 and outStr[-1] != ' '): # Check if the last char is a space byteStr += f"{convertByte(ord(outStr[-1]), arr):02x} " byteStr += "ff" ogDict["bytes"] = byteStr return ogDict def write_text_bin_file(filename, dictionary): with open(filename, 'wb') as binFile: # Let the first byte indicate the number of entries dict_size = len(dictionary) # We need to store 2 bytes instead of one, because not aligning the data to 16 bits will cause corruption on the gba. binFile.write(bytes([dict_size & 0xFF, (dict_size >> 8) & 0xFF])) # After this initial byte, we will read the offset (16 bit) of each line (relative to the last index byte) index = bytearray(len(dictionary) * 2) # bindata will contain the binary data of each entry bindata = bytearray() current_offset = 0 num = 0 # Append every line's binary data to bindata # keep an index of the binary offset within bindata at which each line starts for key, line in dictionary.items(): dictionary[key] = convert_item(line) # store the offset of the line in the index as a 16 bit little endian value index[num * 2] = (current_offset & 0xFF) index[num * 2 + 1] = (current_offset >> 8) & 0xFF linedata = bytes.fromhex(dictionary[key]['bytes']) bindata.extend(linedata) current_offset += len(linedata) if len(linedata) > 1024: print(f"Error: entry '{key}' numBytes exceeds 1024 (got {len(linedata)})", file=sys.stderr) sys.exit(1) num += 1 # Write the index and bindata to the file binFile.write(index) binFile.write(bindata) binFile.close() def write_enum_to_header_file(hFile, prefix, dictionary): num = 0 for key, line in dictionary.items(): hFile.write(f"#define {prefix}{key} {num}\n") num += 1 hFile.write("\n") return num print("Starting parse:") currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Translations") for row in currSheet.iterrows(): #print(row) for lang in Languages: currRow = row[1] #print(currRow) offset = lang.value if (pd.isna(currRow.iloc[7 + lang.value])): offset = Languages.English.value mainDict[lang.name][currRow.iloc[0]][currRow.iloc[1]] = {"bytes": currRow.iloc[7 + offset], "numLines": currRow.iloc[2], "pixelsPerChar": currRow.iloc[3], "pixelsInLine" : currRow.iloc[4], "includeBoxBreaks": currRow.iloc[5], } # generate the header file with open (os.curdir + '/include/translated_text.h', 'w') as hFile: hFile.write("#ifndef DIALOGUE_H\n#define DIALOGUE_H\n\n#include \n\n") # PTGB num = write_enum_to_header_file(hFile, "", mainDict[lang.name]["PTGB"]) hFile.write(f"\n#define DIA_SIZE {num}\n#define DIA_END DIA_SIZE\n\n") # RSEFRLG write_enum_to_header_file(hFile, "RSEFRLG_", mainDict[lang.name]["RSEFRLG"]) # GENERAL write_enum_to_header_file(hFile, "GENERAL_", mainDict[lang.name]["GENERAL"]) # CREDITS write_enum_to_header_file(hFile, "CREDITS_", mainDict[lang.name]["CREDITS"]) # PKMN_NAMES write_enum_to_header_file(hFile, "PKMN_NAMES_", mainDict[lang.name]["PKMN_NAMES"]) hFile.write("/** Returns the LZ10 compressed PTGB text table.*/\n") hFile.write("const u8* get_compressed_PTGB_table();\n\n") hFile.write("/** Returns the LZ10 compressed RSEFRLG text table.*/\n") hFile.write("const u8* get_compressed_rsefrlg_table();\n\n") hFile.write("/** Returns the LZ10 compressed GENERAL text table.*/\n") hFile.write("const u8* get_compressed_general_table();\n\n") hFile.write("/** Returns the LZ10 compressed CREDITS text table.*/\n") hFile.write("const u8* get_compressed_credits_table();\n\n") hFile.write("/** Returns the LZ10 compressed PKMN_NAMES text table.*/\n") hFile.write("const u8* get_compressed_pkmn_names_table();\n\n") hFile.write("\n#endif") hFile.close() # now generate the text tables for lang in Languages: # PTGB table_file = os.curdir + '/to_compress/PTGB_' + lang.name.lower() + '.bin' write_text_bin_file(table_file, mainDict[lang.name]["PTGB"]) # RSEFRLG table_file = os.curdir + '/to_compress/RSEFRLG_' + lang.name.lower() + '.bin' write_text_bin_file(table_file, mainDict[lang.name]["RSEFRLG"]) # GENERAL table_file = os.curdir + '/to_compress/GENERAL_' + lang.name.lower() + '.bin' write_text_bin_file(table_file, mainDict[lang.name]["GENERAL"]) # CREDITS table_file = os.curdir + '/to_compress/CREDITS_' + lang.name.lower() + '.bin' write_text_bin_file(table_file, mainDict[lang.name]["CREDITS"]) # PKMN_NAMES table_file = os.curdir + '/to_compress/PKMN_NAMES_' + lang.name.lower() + '.bin' write_text_bin_file(table_file, mainDict[lang.name]["PKMN_NAMES"]) # now generate the cpp file. with open(os.curdir + '/source/translated_text.cpp', 'w') as cppFile: cppFile.write("#include \"translated_text.h\"\n#include \"debug_mode.h\"\n#include \"extern_pokemon_data.h\"\n") # generate includes for each language for lang in Languages: for cat in mainDict[lang.name]: if cat in {"PTGB", "RSEFRLG", "GENERAL", "CREDITS", "PKMN_NAMES"}: cppFile.write("#include \"" + cat.upper() + "_" + lang.name.lower() + "_lz10_bin.h\"\n") for lang in Languages: cppFile.write(f"\n#if PTGB_BUILD_LANGUAGE == {lang.value + 1}\n") # PTGB cppFile.write("const u8* get_compressed_PTGB_table()\n") cppFile.write("{\n") cppFile.write("\treturn PTGB_" + lang.name.lower() + "_lz10_bin;\n") cppFile.write("}\n\n") # RSEFRLG cppFile.write("const u8* get_compressed_rsefrlg_table()\n") cppFile.write("{\n") cppFile.write("\treturn RSEFRLG_" + lang.name.lower() + "_lz10_bin;\n") cppFile.write("}\n\n") # GENERAL cppFile.write("const u8* get_compressed_general_table()\n") cppFile.write("{\n") cppFile.write("\treturn GENERAL_" + lang.name.lower() + "_lz10_bin;\n") cppFile.write("}\n\n") # CREDITS cppFile.write("const u8* get_compressed_credits_table()\n") cppFile.write("{\n") cppFile.write("\treturn CREDITS_" + lang.name.lower() + "_lz10_bin;\n") cppFile.write("}\n\n") # PKMN_NAMES cppFile.write("const u8* get_compressed_pkmn_names_table()\n") cppFile.write("{\n") cppFile.write("\treturn PKMN_NAMES_" + lang.name.lower() + "_lz10_bin;\n") cppFile.write("}\n\n") cppFile.write(f"#endif\n\n\n") for lang in Languages: for cat in mainDict[lang.name]: if cat in {"PTGB", "RSEFRLG", "GENERAL", "CREDITS", "PKMN_NAMES"}: for item in mainDict[lang.name][cat]: string = mainDict[lang.name][cat][item]["bytes"].split(" ") outText = "" if lang == Languages.Japanese: arr = jpnCharArray else: arr = engCharArray for byte in string: byte = engCharArray[int(byte, 16)] outText += chr(byte) mainDict[lang.name][cat][item]["text"] = outText with open(dir + '/output.json', 'w') as jsonFile: jsonFile.write(json.dumps(mainDict)) print("Parse finished!\n")