mirror of
https://github.com/GearsProgress/Poke_Transporter_GB.git
synced 2026-04-26 02:05:09 -05:00
Add a binary table format and convert the text entries into this format in text_helper/main.py. It then gets compressed with zx0. The new text_data_table and streamed_data_table classes exist to read the various entries from this binary table. streamed_data_table specifically exists to use a decompression buffer that is smaller than the actual binary table. But it requires a decompression buffer that is still larger than ZX0_DEFAULT_WINDOW_SIZE (default: 2048 bytes) and will only be able to decompress in chunks of (<decompression_buffer_size> - <ZX0_DEFAULT_WINDOW_SIZE>) bytes Try to keep the binary text tables sufficiently small though, because since zx0 doesn't actually support random access, getting to the last entry is significantly more expensive than reading the first one. And unless you use streamed_data_table, it also requires <uncompressed_size> bytes of stack space, therefore IWRAM to decompress them. I also had to rework script_array because it can no longer reference the strings directly. Instead we now reference the DIA_* "enum" values. We also no longer store an array of script_obj instances, because these were getting stored in IWRAM since they're non-const global variables originally. Instead we now have const arrays of script_obj_params structs, which should end up in .rodata -> therefore EWRAM. Right now, script_obj only supports the PTGB text table (originally the dialogue array). But if the need arises to support other tables as well, I'd consider adding a separate enum to script_obj_params to indicate the specific table. The compilation process will also output .su files in the build folder from now on. These files indicate the stack frame size for every function in every compilation unit, so be sure to check them from time to time. Note that they will only show the stack consumption for that specific function. So to get the worst case stack consumption, you need to manually add all the functions in a certain stack flow.
540 lines
25 KiB
Python
Executable File
540 lines
25 KiB
Python
Executable File
# import pandas lib as pd
|
||
import pandas as pd
|
||
import os
|
||
from enum import Enum
|
||
import json
|
||
import requests
|
||
from collections import defaultdict
|
||
import copy
|
||
import math
|
||
|
||
update = True
|
||
|
||
print ("\nRunning text_helper:\n\n\n\n---------------")
|
||
|
||
if (update == True):
|
||
|
||
url = 'https://docs.google.com/spreadsheets/d/14LLs5lLqWasFcssBmJdGXjjYxARAJBa_QUOUhXZt4v8/export?format=xlsx'
|
||
|
||
response = requests.get(url)
|
||
file_Path = 'text_helper/text.xlsx'
|
||
|
||
if response.status_code == 200:
|
||
with open(file_Path, 'wb') as file:
|
||
file.write(response.content)
|
||
print('File downloaded successfully')
|
||
else:
|
||
print('Failed to download file')
|
||
|
||
engCharArray = [
|
||
0x20, 0xC0, 0xC1, 0xC2, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0x20, 0xCE, 0xCF, 0xD2, 0xD3, 0xD4,
|
||
0x152, 0xD9, 0xDA, 0xDB, 0xD1, 0xDF, 0xE0, 0xE1, 0x20, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0x20,
|
||
0xEE, 0xEF, 0xF2, 0xF3, 0xF4, 0x153, 0xF9, 0xFA, 0xFB, 0xF1, 0xBA, 0xAA, 0x1D49, 0x26, 0x2B, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x3D, 0x3B, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x25AF, 0xBF, 0xA1, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xCD, 0x25, 0x28, 0x29, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE2, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xED,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2B07, 0x2B05, 0x27A1, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x1D49, 0x3C, 0x3E, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x2B3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x21, 0x3F, 0x2E, 0x2D, 0x30FB,
|
||
0x2026, 0x201C, 0x201D, 0x2018, 0x2019, 0x2642, 0x2640, 0x20, 0x2C, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45,
|
||
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
|
||
0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
|
||
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6,
|
||
0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E,
|
||
]
|
||
|
||
engCharWidthArray = [
|
||
0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x9, 0x6, 0x6, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0xA, 0x8, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x6, 0x6, 0x4, 0x8, 0x8, 0x8, 0x7, 0x8, 0x8, 0x4, 0x6, 0x6, 0x4, 0x4, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x7, 0x7, 0x7, 0x2, 0x3, 0x4,
|
||
0x5, 0x5, 0x6, 0x7, 0x5, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x6, 0x3, 0x6, 0x3,
|
||
0x6, 0x6, 0x6, 0x3, 0x3, 0x6, 0x6, 0x6, 0x3, 0x7, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x5, 0x6,
|
||
0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x5, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x8,
|
||
0x3, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x38, 0x0, 0x0, ]
|
||
|
||
jpnCharArray = [
|
||
0x20, 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x304B, 0x304D, 0x304F, 0x3051, 0x3053, 0x3055, 0x3057, 0x3059, 0x305B, 0x305D,
|
||
0x305F, 0x3061, 0x3064, 0x3066, 0x3068, 0x306A, 0x306B, 0x306C, 0x306D, 0x306E, 0x306F, 0x3072, 0x3075, 0x3078, 0x307B, 0x307E,
|
||
0x307F, 0x3080, 0x3081, 0x3082, 0x3084, 0x3086, 0x3088, 0x3089, 0x308A, 0x308B, 0x308C, 0x308D, 0x308F, 0x3092, 0x3093, 0x3041,
|
||
0x3043, 0x3045, 0x3047, 0x3049, 0x3083, 0x3085, 0x3087, 0x304C, 0x304E, 0x3050, 0x3052, 0x3054, 0x3056, 0x3058, 0x305A, 0x305C,
|
||
0x305E, 0x3060, 0x3062, 0x3065, 0x3067, 0x3069, 0x3070, 0x3073, 0x3076, 0x3079, 0x307C, 0x3071, 0x3074, 0x3077, 0x307A, 0x307D,
|
||
0x3063, 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30AB, 0x30AD, 0x30AF, 0x30B1, 0x30B3, 0x30B5, 0x30B7, 0x30B9, 0x30BB, 0x30BD,
|
||
0x30BF, 0x30C1, 0x30C4, 0x30C6, 0x30C8, 0x30CA, 0x30CB, 0x30CC, 0x30CD, 0x30CE, 0x30CF, 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30DE,
|
||
0x30DF, 0x30E0, 0x30E1, 0x30E2, 0x30E4, 0x30E6, 0x30E8, 0x30E9, 0x30EA, 0x20, 0x30EC, 0x30ED, 0x30EF, 0x30F2, 0x30F3, 0x30A1,
|
||
0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30E3, 0x30E5, 0x30E7, 0x30AC, 0x30AE, 0x30B0, 0x30B2, 0x30B4, 0x30B6, 0x30B8, 0x30BA, 0x30BC,
|
||
0x30BE, 0x30C0, 0x30C2, 0x30C5, 0x30C7, 0x30C9, 0x30D0, 0x30D3, 0x30D6, 0x30D9, 0x30DC, 0x30D1, 0x30D4, 0x30D7, 0x30DA, 0x30DD,
|
||
0x30C3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFF01, 0xFF1F, 0x3002, 0x30FC, 0x30FB,
|
||
0x30FB, 0x300E, 0x300F, 0x300C, 0x300D, 0x2642, 0x2640, 0x5186, 0x2E, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45,
|
||
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
|
||
0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
|
||
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6,
|
||
0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E,
|
||
]
|
||
|
||
jpnCharWidthArray = [
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x28, 0x0, 0x0, ]
|
||
|
||
charConversionList = [
|
||
# replaces the first char in the list with the latter
|
||
["'", "’"],
|
||
]
|
||
|
||
def convertByte(incoming, array):
|
||
for pair in charConversionList:
|
||
if incoming == ord(pair[0]):
|
||
incoming = ord(pair[1])
|
||
#print(f"Warning! {pair[0]} found, replacing with {pair[1]} !")
|
||
next_key = max(mainDict[lang.name]["Warnings"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Warnings"][next_key] = f"Warning! {pair[0]} found, replacing with {pair[1]} !"
|
||
|
||
|
||
index = 0
|
||
for val in array:
|
||
if val == incoming:
|
||
return index
|
||
index += 1
|
||
#print(f"Error! No match found for char [ {chr(incoming)} ]!")
|
||
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"Error! No match found for char [ {chr(incoming)} ]!"
|
||
return 0
|
||
|
||
def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine):
|
||
# If we can optimize this to remove the spaces, it could save a few bytes.
|
||
|
||
outStr = ""
|
||
currLine = ""
|
||
lineCount = 0
|
||
currWordIndex = 0
|
||
lineLength = 0
|
||
spaceLength = 0
|
||
words = sentence.split()
|
||
while(currWordIndex < len(words)):
|
||
word = words[currWordIndex]
|
||
wordLength = 0
|
||
# print(word)
|
||
|
||
# Figure out the length of the word in pixels
|
||
for char in word:
|
||
if (pixelsPerChar == "Variable"):
|
||
if(lang == Languages.Japanese):
|
||
wordLength += jpnCharWidthArray[convertByte(ord(char), engCharArray)]
|
||
spaceLength = jpnCharWidthArray[convertByte(ord(' '), engCharArray)]
|
||
else:
|
||
wordLength += engCharWidthArray[convertByte(ord(char), engCharArray)]
|
||
spaceLength = engCharWidthArray[convertByte(ord(' '), engCharArray)]
|
||
|
||
elif (pixelsPerChar == "Default"):
|
||
if (lang == Languages.Japanese):
|
||
wordLength += 8
|
||
spaceLength = 8
|
||
|
||
else:
|
||
wordLength += 6
|
||
spaceLength = 6
|
||
|
||
# See if the whole sentence is a newline
|
||
if (sentence == "Ň"):
|
||
outStr += "Ň"
|
||
currLine = ""
|
||
lineCount += 1
|
||
offset = 0
|
||
lineLength = 0
|
||
currWordIndex += 1
|
||
|
||
# See if the sentence is a new box
|
||
elif(sentence == "Ş" or sentence == "ȼ"):
|
||
outStr += sentence
|
||
currLine = ""
|
||
offset = 0
|
||
lineLength = 0
|
||
currWordIndex += 1
|
||
|
||
# Test if the word is too long in general
|
||
elif (wordLength > pixelsInLine):
|
||
#print(f"ERROR: Word {word} exceeds alloted length")
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"ERROR: Word {word} exceeds alloted length"
|
||
currWordIndex += 1
|
||
|
||
# Test if adding the word will go over our alloted space
|
||
elif ((wordLength + lineLength + offset) <= pixelsInLine):
|
||
# If not, add the word and increase the index
|
||
currLine += (word + " ")
|
||
lineLength += (wordLength + spaceLength)
|
||
currWordIndex += 1
|
||
|
||
# We need to move to the next line
|
||
else:
|
||
# Every line should already have a space at the end of it. Remove it here
|
||
outStr += (currLine[:-1] + "Ň")
|
||
currLine = ""
|
||
lineCount += 1
|
||
lineLength = 0
|
||
offset = 0
|
||
|
||
outStr += currLine
|
||
return lineLength + offset, lineCount, outStr
|
||
|
||
# -*- coding: utf-8 -*-
|
||
import re
|
||
alphabets= "([A-Za-z])"
|
||
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
|
||
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
|
||
starters = "(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
|
||
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||
websites = "[.](com|net|org|io|gov|edu|me)"
|
||
digits = "([0-9])"
|
||
multiple_dots = r'\.{2,}'
|
||
|
||
def split_into_sentences(text: str) -> list[str]:
|
||
"""
|
||
Split the text into sentences.
|
||
|
||
If the text contains substrings "<prd>" or "<stop>", they would lead
|
||
to incorrect splitting because they are used as markers for splitting.
|
||
|
||
:param text: text to be split into sentences
|
||
:type text: str
|
||
|
||
:return: list of sentences
|
||
:rtype: list[str]
|
||
"""
|
||
text = " " + text + " "
|
||
text = text.replace("\n"," ")
|
||
text = re.sub(prefixes,"\\1<prd>",text)
|
||
text = re.sub(websites,"<prd>\\1",text)
|
||
text = re.sub(digits + "[.]" + digits,"\\1<prd>\\2",text)
|
||
text = re.sub(multiple_dots, lambda match: "<prd>" * len(match.group(0)) + "<stop>", text)
|
||
if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
|
||
text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
|
||
text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
|
||
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
|
||
text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
|
||
text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
|
||
text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
|
||
text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
|
||
if "”" in text: text = text.replace(".”","”.")
|
||
if "\"" in text: text = text.replace(".\"","\".")
|
||
if "!" in text: text = text.replace("!\"","\"!")
|
||
if "?" in text: text = text.replace("?\"","\"?")
|
||
if "。" in text: text = text.replace("。\"","\"。") # Added for Japanese support
|
||
if "?" in text: text = text.replace("?\"","\"?") # Added for Japanese support
|
||
if "!" in text: text = text.replace("!\"","\"!") # Added for Japanese support
|
||
text = text.replace(".",".<stop>")
|
||
text = text.replace("?","?<stop>")
|
||
text = text.replace("!","!<stop>")
|
||
text = text.replace("。","。<stop>") # Added for Japanese support
|
||
text = text.replace("?","?<stop>") # Added for Japanese support
|
||
text = text.replace("!","!<stop>") # Added for Japanese support
|
||
text = text.replace("<prd>",".")
|
||
text = text.replace("Ň", "<stop>Ň<stop>") # Split newlines into their own sentences
|
||
text = text.replace("ȼ", "<stop>ȼ<stop>") # Split new boxes into their own sentences
|
||
text = text.replace("Ş", "<stop>Ş<stop>") # Split new boxes into their own sentences
|
||
sentences = text.split("<stop>")
|
||
sentences = [s.strip() for s in sentences]
|
||
if sentences and not sentences[-1]: sentences = sentences[:-1]
|
||
return sentences
|
||
|
||
class Languages(Enum):
|
||
Japanese = 0
|
||
English = 1
|
||
French = 2
|
||
German = 3
|
||
Italian = 4
|
||
SpanishEU = 5
|
||
SpanishLA = 6
|
||
|
||
# read by default 1st sheet of an excel file
|
||
dir = os.curdir + "/text_helper"
|
||
|
||
mainDict = {}
|
||
|
||
for lang in Languages:
|
||
mainDict[lang.name] = {
|
||
"PTGB": {},
|
||
"RSEFRLG": {},
|
||
"GB": {},
|
||
"GENERAL": {},
|
||
"CREDITS": {},
|
||
"PKMN_NAMES": {},
|
||
"Warnings" : {},
|
||
"Errors": {},
|
||
}
|
||
|
||
def convert_item(ogDict):
|
||
line = ogDict["bytes"]
|
||
numLines = ogDict["numLines"]
|
||
pixelsPerChar = ogDict["pixelsPerChar"]
|
||
pixelsInLine = ogDict["pixelsInLine"]
|
||
include_box_breaks = ogDict["includeBoxBreaks"]
|
||
split_sents = split_into_sentences(line)
|
||
index = 0
|
||
outStr = ""
|
||
currLine = 0
|
||
offset = 0
|
||
escapeCount = 0
|
||
while index < len(split_sents) and escapeCount < 100:
|
||
offset, recievedLine, out = SplitSentenceIntoLines(split_sents[index], offset, pixelsPerChar, pixelsInLine)
|
||
currLine += recievedLine
|
||
|
||
if (out == "ȼ"):
|
||
offset = 0
|
||
currLine = 0
|
||
outStr = outStr[:-1]
|
||
outStr += "ȼ"
|
||
index += 1
|
||
elif (currLine < numLines):
|
||
#print(split_sents[index])
|
||
index += 1
|
||
outStr += out
|
||
else:
|
||
outStr = outStr[:-1]
|
||
outStr += "ȼ" # new textbox character
|
||
offset = 0
|
||
currLine = 0
|
||
escapeCount += 1
|
||
#print(index)
|
||
if not include_box_breaks:
|
||
#print(f"ERROR! Made a line break when disabled, sentence \"{outStr}\" is too long!")
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"ERROR! Made a line break when disabled, sentence \"{outStr}\" is too long!"
|
||
|
||
if escapeCount == 100:
|
||
#print(f"ERROR! Sentence \"{out}\" is too long!")
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"ERROR! Sentence \"{out}\" is too long!"
|
||
|
||
# Some cases that should be fixed
|
||
exitLoop = False
|
||
while(not exitLoop):
|
||
newStr = outStr
|
||
# A space right before a newline just takes up space
|
||
newStr = newStr.replace(" Ň", "Ň")
|
||
# Newlines shouldn't happen right after a new textbox
|
||
newStr = newStr.replace("ȼŇ", "ȼ")
|
||
# Nor should newlines be right before a new textbox
|
||
newStr = newStr.replace("Ňȼ", "ȼ")
|
||
# Nor should a new textbox be after a new textbox
|
||
newStr = newStr.replace("ȼȼ", "ȼ")
|
||
# Nor should a new scroll be after a new textbox
|
||
newStr = newStr.replace("Şȼ", "Ş")
|
||
# Nor should a new scroll be after a new textbox
|
||
newStr = newStr.replace("ȼŞ", "ȼ")
|
||
|
||
exitLoop = (newStr == outStr)
|
||
outStr = newStr
|
||
|
||
byteStr = ""
|
||
if lang == Languages.Japanese:
|
||
arr = jpnCharArray
|
||
else:
|
||
arr = engCharArray
|
||
for char in outStr[:-1]:
|
||
byteStr += f"{convertByte(ord(char), arr):02x} "
|
||
if (len(outStr) > 0 and outStr[-1] != ' '): # Check if the last char is a space
|
||
byteStr += f"{convertByte(ord(outStr[-1]), arr):02x} "
|
||
|
||
byteStr += "ff"
|
||
|
||
ogDict["bytes"] = byteStr
|
||
return ogDict
|
||
|
||
def write_text_bin_file(filename, dictionary):
|
||
with open(filename, 'wb') as binFile:
|
||
# Let the first byte indicate the number of entries
|
||
dict_size = len(dictionary)
|
||
# We need to store 2 bytes instead of one, because not aligning the data to 16 bits will cause corruption on the gba.
|
||
binFile.write(bytes([dict_size & 0xFF, (dict_size >> 8) & 0xFF]))
|
||
# After this initial byte, we will read the offset (16 bit) of each line (relative to the last index byte)
|
||
index = bytearray(len(dictionary) * 2)
|
||
# bindata will contain the binary data of each entry
|
||
bindata = bytearray()
|
||
current_offset = 0
|
||
|
||
num = 0
|
||
# Append every line's binary data to bindata
|
||
# keep an index of the binary offset within bindata at which each line starts
|
||
for key, line in dictionary.items():
|
||
dictionary[key] = convert_item(line)
|
||
# store the offset of the line in the index as a 16 bit little endian value
|
||
index[num * 2] = (current_offset & 0xFF)
|
||
index[num * 2 + 1] = (current_offset >> 8) & 0xFF
|
||
linedata = bytes.fromhex(dictionary[key]['bytes'])
|
||
bindata.extend(linedata)
|
||
current_offset += len(linedata)
|
||
num += 1
|
||
|
||
# Write the index and bindata to the file
|
||
binFile.write(index)
|
||
binFile.write(bindata)
|
||
binFile.close()
|
||
|
||
def write_enum_to_header_file(hFile, prefix, dictionary):
|
||
num = 0
|
||
for key, line in dictionary.items():
|
||
hFile.write(f"#define {prefix}{key} {num}\n")
|
||
num += 1
|
||
hFile.write("\n")
|
||
return num
|
||
|
||
|
||
print("\n\nStarting parse: \n")
|
||
|
||
currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Translations")
|
||
for row in currSheet.iterrows():
|
||
#print(row)
|
||
for lang in Languages:
|
||
currRow = row[1]
|
||
#print(currRow)
|
||
offset = lang.value
|
||
if (pd.isna(currRow.iloc[7 + lang.value])):
|
||
offset = Languages.English.value
|
||
mainDict[lang.name][currRow.iloc[0]][currRow.iloc[1]] = {"bytes": currRow.iloc[7 + offset],
|
||
"numLines": currRow.iloc[2],
|
||
"pixelsPerChar": currRow.iloc[3],
|
||
"pixelsInLine" : currRow.iloc[4],
|
||
"includeBoxBreaks": currRow.iloc[5],
|
||
}
|
||
|
||
# generate the header file
|
||
with open (os.curdir + '/include/translated_text.h', 'w') as hFile:
|
||
hFile.write("#ifndef DIALOGUE_H\n#define DIALOGUE_H\n\n#include <tonc.h>\n\n")
|
||
|
||
# PTGB
|
||
num = write_enum_to_header_file(hFile, "", mainDict[lang.name]["PTGB"])
|
||
hFile.write(f"\n#define DIA_SIZE {num}\n#define DIA_END DIA_SIZE\n\n")
|
||
|
||
# RSEFRLG
|
||
write_enum_to_header_file(hFile, "RSEFRLG_", mainDict[lang.name]["RSEFRLG"])
|
||
|
||
# GENERAL
|
||
write_enum_to_header_file(hFile, "GENERAL_", mainDict[lang.name]["GENERAL"])
|
||
|
||
# CREDITS
|
||
write_enum_to_header_file(hFile, "CREDITS_", mainDict[lang.name]["CREDITS"])
|
||
|
||
# PKMN_NAMES
|
||
write_enum_to_header_file(hFile, "PKMN_NAMES_", mainDict[lang.name]["PKMN_NAMES"])
|
||
|
||
hFile.write("/** Returns the ZX0 compressed PTGB text table.*/\n")
|
||
hFile.write("const u8* get_compressed_PTGB_table();\n\n")
|
||
hFile.write("/** Returns the ZX0 compressed RSEFRLG text table.*/\n")
|
||
hFile.write("const u8* get_compressed_rsefrlg_table();\n\n")
|
||
hFile.write("/** Returns the ZX0 compressed GENERAL text table.*/\n")
|
||
hFile.write("const u8* get_compressed_general_table();\n\n")
|
||
hFile.write("/** Returns the ZX0 compressed CREDITS text table.*/\n")
|
||
hFile.write("const u8* get_compressed_credits_table();\n\n")
|
||
hFile.write("/** Returns the ZX0 compressed PKMN_NAMES text table.*/\n")
|
||
hFile.write("const u8* get_compressed_pkmn_names_table();\n\n")
|
||
|
||
hFile.write("\n#endif")
|
||
hFile.close()
|
||
|
||
# now generate the text tables
|
||
for lang in Languages:
|
||
# PTGB
|
||
table_file = os.curdir + '/to_compress/PTGB_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["PTGB"])
|
||
|
||
# RSEFRLG
|
||
table_file = os.curdir + '/to_compress/RSEFRLG_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["RSEFRLG"])
|
||
|
||
# GENERAL
|
||
table_file = os.curdir + '/to_compress/GENERAL_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["GENERAL"])
|
||
|
||
# CREDITS
|
||
table_file = os.curdir + '/to_compress/CREDITS_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["CREDITS"])
|
||
|
||
# PKMN_NAMES
|
||
table_file = os.curdir + '/to_compress/PKMN_NAMES_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["PKMN_NAMES"])
|
||
|
||
# now generate the cpp file.
|
||
with open(os.curdir + '/source/translated_text.cpp', 'w') as cppFile:
|
||
cppFile.write("#include \"translated_text.h\"\n#include \"debug_mode.h\"\n#include \"pokemon_data.h\"\n#include \"zx0_decompressor.h\"\n")
|
||
# generate includes for each language
|
||
for lang in Languages:
|
||
for cat in mainDict[lang.name]:
|
||
if cat in {"PTGB", "RSEFRLG", "GENERAL", "CREDITS", "PKMN_NAMES"}:
|
||
cppFile.write("#include \"" + cat.upper() + "_" + lang.name.lower() + "_zx0_bin.h\"\n")
|
||
|
||
for lang in Languages:
|
||
cppFile.write(f"\n#if PTGB_BUILD_LANGUAGE == {lang.value + 1}\n")
|
||
# PTGB
|
||
cppFile.write("const u8* get_compressed_PTGB_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn PTGB_" + lang.name.lower() + "_zx0_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# RSEFRLG
|
||
cppFile.write("const u8* get_compressed_rsefrlg_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn RSEFRLG_" + lang.name.lower() + "_zx0_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# GENERAL
|
||
cppFile.write("const u8* get_compressed_general_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn GENERAL_" + lang.name.lower() + "_zx0_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# CREDITS
|
||
cppFile.write("const u8* get_compressed_credits_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn CREDITS_" + lang.name.lower() + "_zx0_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# PKMN_NAMES
|
||
cppFile.write("const u8* get_compressed_pkmn_names_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn PKMN_NAMES_" + lang.name.lower() + "_zx0_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
|
||
cppFile.write(f"#endif\n\n\n")
|
||
|
||
|
||
for lang in Languages:
|
||
for cat in mainDict[lang.name]:
|
||
if cat in {"PTGB", "RSEFRLG", "GENERAL", "CREDITS", "PKMN_NAMES"}:
|
||
for item in mainDict[lang.name][cat]:
|
||
string = mainDict[lang.name][cat][item]["bytes"].split(" ")
|
||
outText = ""
|
||
if lang == Languages.Japanese:
|
||
arr = jpnCharArray
|
||
else:
|
||
arr = engCharArray
|
||
for byte in string:
|
||
byte = engCharArray[int(byte, 16)]
|
||
outText += chr(byte)
|
||
mainDict[lang.name][cat][item]["text"] = outText
|
||
|
||
with open(dir + '/output.json', 'w') as jsonFile:
|
||
jsonFile.write(json.dumps(mainDict))
|
||
|