mirror of
https://github.com/GearsProgress/Poke_Transporter_GB.git
synced 2026-03-21 17:34:42 -05:00
581 lines
26 KiB
Python
Executable File
581 lines
26 KiB
Python
Executable File
# import pandas lib as pd
|
||
import pandas as pd
|
||
import os
|
||
from enum import Enum
|
||
import json
|
||
import requests
|
||
from collections import defaultdict
|
||
import copy
|
||
import math
|
||
import sys
|
||
import filecmp
|
||
|
||
update = True
|
||
|
||
print ("Running text_helper:")
|
||
|
||
if (update == True):
|
||
|
||
url = 'https://docs.google.com/spreadsheets/d/14LLs5lLqWasFcssBmJdGXjjYxARAJBa_QUOUhXZt4v8/export?format=xlsx'
|
||
new_file_path = 'text_helper/new_text.xlsx'
|
||
old_file_path = 'text_helper/text.xlsx'
|
||
json_file_path = 'text_helper/output.json'
|
||
no_file = False
|
||
|
||
try:
|
||
response = requests.get(url, timeout=5)
|
||
response.raise_for_status()
|
||
if response.status_code == 200:
|
||
with open(new_file_path, 'wb') as file:
|
||
file.write(response.content)
|
||
print('File downloaded successfully')
|
||
except requests.exceptions.ReadTimeout as errrt:
|
||
if os.path.exists(old_file_path):
|
||
print("Connection timed out. Continuing with locally downloaded file.")
|
||
no_file = True
|
||
else:
|
||
print("xlsx file is missing and connection timed out. Exiting...")
|
||
except requests.exceptions.ConnectionError as conerr:
|
||
if os.path.exists(old_file_path):
|
||
print("Connection error. Continuing with locally downloaded file.")
|
||
no_file = True
|
||
else:
|
||
print("xlsx file is missing and connection timed out. Exiting...")
|
||
|
||
|
||
if os.path.exists(old_file_path):
|
||
if (not no_file):
|
||
new_file = pd.read_excel(new_file_path, sheet_name="Translations")
|
||
old_file = pd.read_excel(old_file_path, sheet_name="Translations")
|
||
if no_file or new_file.equals(old_file):
|
||
if os.path.exists(json_file_path):
|
||
print("Downloaded file is identical. Skipping parse\n")
|
||
if (not no_file):
|
||
os.remove(new_file_path)
|
||
exit()
|
||
print("json file missing - forcing rebuild.")
|
||
os.remove(old_file_path)
|
||
os.rename(new_file_path, old_file_path)
|
||
else:
|
||
print("xlsx file missing - forcing rebuild.")
|
||
os.rename(new_file_path, old_file_path)
|
||
|
||
|
||
engCharArray = [
|
||
0x20, 0xC0, 0xC1, 0xC2, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0x20, 0xCE, 0xCF, 0xD2, 0xD3, 0xD4,
|
||
0x152, 0xD9, 0xDA, 0xDB, 0xD1, 0xDF, 0xE0, 0xE1, 0x20, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0x20,
|
||
0xEE, 0xEF, 0xF2, 0xF3, 0xF4, 0x153, 0xF9, 0xFA, 0xFB, 0xF1, 0xBA, 0xAA, 0x1D49, 0x26, 0x2B, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x3D, 0x3B, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x25AF, 0xBF, 0xA1, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xCD, 0x25, 0x28, 0x29, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE2, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xED,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2B07, 0x2B05, 0x27A1, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x1D49, 0x3C, 0x3E, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x2B3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x21, 0x3F, 0x2E, 0x2D, 0x30FB,
|
||
0x2026, 0x201C, 0x201D, 0x2018, 0x2019, 0x2642, 0x2640, 0x20, 0x2C, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45,
|
||
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
|
||
0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
|
||
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6,
|
||
0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E,
|
||
]
|
||
|
||
engCharWidthArray = [
|
||
0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x9, 0x6, 0x6, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0xA, 0x8, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x6, 0x6, 0x4, 0x8, 0x8, 0x8, 0x7, 0x8, 0x8, 0x4, 0x6, 0x6, 0x4, 0x4, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x7, 0x7, 0x7, 0x2, 0x3, 0x4,
|
||
0x5, 0x5, 0x6, 0x7, 0x5, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x6, 0x3, 0x6, 0x3,
|
||
0x6, 0x6, 0x6, 0x3, 0x3, 0x6, 0x6, 0x6, 0x3, 0x7, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x5, 0x6,
|
||
0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x5, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x8,
|
||
0x3, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x38, 0x0, 0x0, ]
|
||
|
||
jpnCharArray = [
|
||
0x20, 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x304B, 0x304D, 0x304F, 0x3051, 0x3053, 0x3055, 0x3057, 0x3059, 0x305B, 0x305D,
|
||
0x305F, 0x3061, 0x3064, 0x3066, 0x3068, 0x306A, 0x306B, 0x306C, 0x306D, 0x306E, 0x306F, 0x3072, 0x3075, 0x3078, 0x307B, 0x307E,
|
||
0x307F, 0x3080, 0x3081, 0x3082, 0x3084, 0x3086, 0x3088, 0x3089, 0x308A, 0x308B, 0x308C, 0x308D, 0x308F, 0x3092, 0x3093, 0x3041,
|
||
0x3043, 0x3045, 0x3047, 0x3049, 0x3083, 0x3085, 0x3087, 0x304C, 0x304E, 0x3050, 0x3052, 0x3054, 0x3056, 0x3058, 0x305A, 0x305C,
|
||
0x305E, 0x3060, 0x3062, 0x3065, 0x3067, 0x3069, 0x3070, 0x3073, 0x3076, 0x3079, 0x307C, 0x3071, 0x3074, 0x3077, 0x307A, 0x307D,
|
||
0x3063, 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30AB, 0x30AD, 0x30AF, 0x30B1, 0x30B3, 0x30B5, 0x30B7, 0x30B9, 0x30BB, 0x30BD,
|
||
0x30BF, 0x30C1, 0x30C4, 0x30C6, 0x30C8, 0x30CA, 0x30CB, 0x30CC, 0x30CD, 0x30CE, 0x30CF, 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30DE,
|
||
0x30DF, 0x30E0, 0x30E1, 0x30E2, 0x30E4, 0x30E6, 0x30E8, 0x30E9, 0x30EA, 0x20, 0x30EC, 0x30ED, 0x30EF, 0x30F2, 0x30F3, 0x30A1,
|
||
0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30E3, 0x30E5, 0x30E7, 0x30AC, 0x30AE, 0x30B0, 0x30B2, 0x30B4, 0x30B6, 0x30B8, 0x30BA, 0x30BC,
|
||
0x30BE, 0x30C0, 0x30C2, 0x30C5, 0x30C7, 0x30C9, 0x30D0, 0x30D3, 0x30D6, 0x30D9, 0x30DC, 0x30D1, 0x30D4, 0x30D7, 0x30DA, 0x30DD,
|
||
0x30C3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFF01, 0xFF1F, 0x3002, 0x30FC, 0x30FB,
|
||
0x30FB, 0x300E, 0x300F, 0x300C, 0x300D, 0x2642, 0x2640, 0x5186, 0x2E, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45,
|
||
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
|
||
0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
|
||
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6,
|
||
0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E,
|
||
]
|
||
|
||
jpnCharWidthArray = [
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x28, 0x0, 0x0, ]
|
||
|
||
charConversionList = [
|
||
# replaces the first char in the list with the latter
|
||
["'", "’"],
|
||
]
|
||
|
||
def convertByte(incoming, array):
|
||
for pair in charConversionList:
|
||
if incoming == ord(pair[0]):
|
||
incoming = ord(pair[1])
|
||
#print(f"Warning! {pair[0]} found, replacing with {pair[1]} !")
|
||
next_key = max(mainDict[lang.name]["Warnings"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Warnings"][next_key] = f"Warning! {pair[0]} found, replacing with {pair[1]} !"
|
||
|
||
|
||
index = 0
|
||
for val in array:
|
||
if val == incoming:
|
||
return index
|
||
index += 1
|
||
#print(f"Error! No match found for char [ {chr(incoming)} ]!")
|
||
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"Error! No match found for char [ {chr(incoming)} ]!"
|
||
return 0
|
||
|
||
def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine):
|
||
# If we can optimize this to remove the spaces, it could save a few bytes.
|
||
|
||
outStr = ""
|
||
currLine = ""
|
||
lineCount = 0
|
||
currWordIndex = 0
|
||
lineLength = 0
|
||
spaceLength = 0
|
||
words = sentence.split()
|
||
while(currWordIndex < len(words)):
|
||
word = words[currWordIndex]
|
||
wordLength = 0
|
||
# print(word)
|
||
|
||
# Figure out the length of the word in pixels
|
||
for char in word:
|
||
if (pixelsPerChar == "Variable"):
|
||
if(lang == Languages.Japanese):
|
||
wordLength += jpnCharWidthArray[convertByte(ord(char), engCharArray)]
|
||
spaceLength = jpnCharWidthArray[convertByte(ord(' '), engCharArray)]
|
||
else:
|
||
wordLength += engCharWidthArray[convertByte(ord(char), engCharArray)]
|
||
spaceLength = engCharWidthArray[convertByte(ord(' '), engCharArray)]
|
||
|
||
elif (pixelsPerChar == "Default"):
|
||
if (lang == Languages.Japanese):
|
||
wordLength += 8
|
||
spaceLength = 8
|
||
|
||
else:
|
||
wordLength += 6
|
||
spaceLength = 6
|
||
|
||
# See if the whole sentence is a newline
|
||
if (sentence == "Ň"):
|
||
outStr += "Ň"
|
||
currLine = ""
|
||
lineCount += 1
|
||
offset = 0
|
||
lineLength = 0
|
||
currWordIndex += 1
|
||
|
||
# See if the sentence is a new box
|
||
elif(sentence == "Ş" or sentence == "ȼ"):
|
||
outStr += sentence
|
||
currLine = ""
|
||
offset = 0
|
||
lineLength = 0
|
||
currWordIndex += 1
|
||
|
||
# Test if the word is too long in general
|
||
elif (wordLength > pixelsInLine):
|
||
#print(f"ERROR: Word {word} exceeds alloted length")
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"ERROR: Word {word} exceeds alloted length"
|
||
currWordIndex += 1
|
||
|
||
# Test if adding the word will go over our alloted space
|
||
elif ((wordLength + lineLength + offset) <= pixelsInLine):
|
||
# If not, add the word and increase the index
|
||
currLine += (word + " ")
|
||
lineLength += (wordLength + spaceLength)
|
||
currWordIndex += 1
|
||
|
||
# We need to move to the next line
|
||
else:
|
||
# Every line should already have a space at the end of it. Remove it here
|
||
outStr += (currLine[:-1] + "Ň")
|
||
currLine = ""
|
||
lineCount += 1
|
||
lineLength = 0
|
||
offset = 0
|
||
|
||
outStr += currLine
|
||
return lineLength + offset, lineCount, outStr
|
||
|
||
# -*- coding: utf-8 -*-
|
||
import re
|
||
alphabets= r"([A-Za-z])"
|
||
prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]"
|
||
suffixes = r"(Inc|Ltd|Jr|Sr|Co)"
|
||
starters = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
|
||
acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||
websites = r"[.](com|net|org|io|gov|edu|me)"
|
||
digits = r"([0-9])"
|
||
multiple_dots = r'\.{2,}'
|
||
|
||
def split_into_sentences(text: str) -> list[str]:
|
||
"""
|
||
Split the text into sentences.
|
||
|
||
If the text contains substrings "<prd>" or "<stop>", they would lead
|
||
to incorrect splitting because they are used as markers for splitting.
|
||
|
||
:param text: text to be split into sentences
|
||
:type text: str
|
||
|
||
:return: list of sentences
|
||
:rtype: list[str]
|
||
"""
|
||
text = " " + text + " "
|
||
text = text.replace("\n"," ")
|
||
text = re.sub(prefixes,"\\1<prd>",text)
|
||
text = re.sub(websites,"<prd>\\1",text)
|
||
text = re.sub(digits + "[.]" + digits,"\\1<prd>\\2",text)
|
||
text = re.sub(multiple_dots, lambda match: "<prd>" * len(match.group(0)) + "<stop>", text)
|
||
if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
|
||
text = re.sub(r"\s" + alphabets + "[.] "," \\1<prd> ",text)
|
||
text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
|
||
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
|
||
text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
|
||
text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
|
||
text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
|
||
text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
|
||
if "”" in text: text = text.replace(".”","”.")
|
||
if "\"" in text: text = text.replace(".\"","\".")
|
||
if "!" in text: text = text.replace("!\"","\"!")
|
||
if "?" in text: text = text.replace("?\"","\"?")
|
||
if "。" in text: text = text.replace("。\"","\"。") # Added for Japanese support
|
||
if "?" in text: text = text.replace("?\"","\"?") # Added for Japanese support
|
||
if "!" in text: text = text.replace("!\"","\"!") # Added for Japanese support
|
||
text = text.replace(".",".<stop>")
|
||
text = text.replace("?","?<stop>")
|
||
text = text.replace("!","!<stop>")
|
||
text = text.replace("。","。<stop>") # Added for Japanese support
|
||
text = text.replace("?","?<stop>") # Added for Japanese support
|
||
text = text.replace("!","!<stop>") # Added for Japanese support
|
||
text = text.replace("<prd>",".")
|
||
text = text.replace("Ň", "<stop>Ň<stop>") # Split newlines into their own sentences
|
||
text = text.replace("ȼ", "<stop>ȼ<stop>") # Split new boxes into their own sentences
|
||
text = text.replace("Ş", "<stop>Ş<stop>") # Split new boxes into their own sentences
|
||
sentences = text.split("<stop>")
|
||
sentences = [s.strip() for s in sentences]
|
||
if sentences and not sentences[-1]: sentences = sentences[:-1]
|
||
return sentences
|
||
|
||
class Languages(Enum):
|
||
Japanese = 0
|
||
English = 1
|
||
French = 2
|
||
German = 3
|
||
Italian = 4
|
||
SpanishEU = 5
|
||
SpanishLA = 6
|
||
|
||
# read by default 1st sheet of an excel file
|
||
dir = os.curdir + "/text_helper"
|
||
|
||
mainDict = {}
|
||
|
||
for lang in Languages:
|
||
mainDict[lang.name] = {
|
||
"PTGB": {},
|
||
"RSEFRLG": {},
|
||
"GB": {},
|
||
"GENERAL": {},
|
||
"CREDITS": {},
|
||
"PKMN_NAMES": {},
|
||
"Warnings" : {},
|
||
"Errors": {},
|
||
}
|
||
|
||
def convert_item(ogDict):
|
||
line = ogDict["bytes"]
|
||
numLines = ogDict["numLines"]
|
||
pixelsPerChar = ogDict["pixelsPerChar"]
|
||
pixelsInLine = ogDict["pixelsInLine"]
|
||
include_box_breaks = ogDict["includeBoxBreaks"]
|
||
split_sents = split_into_sentences(line)
|
||
index = 0
|
||
outStr = ""
|
||
currLine = 0
|
||
offset = 0
|
||
escapeCount = 0
|
||
while index < len(split_sents) and escapeCount < 100:
|
||
offset, recievedLine, out = SplitSentenceIntoLines(split_sents[index], offset, pixelsPerChar, pixelsInLine)
|
||
currLine += recievedLine
|
||
|
||
if (out == "ȼ"):
|
||
offset = 0
|
||
currLine = 0
|
||
outStr = outStr[:-1]
|
||
outStr += "ȼ"
|
||
index += 1
|
||
elif (currLine < numLines):
|
||
#print(split_sents[index])
|
||
index += 1
|
||
outStr += out
|
||
else:
|
||
outStr = outStr[:-1]
|
||
outStr += "ȼ" # new textbox character
|
||
offset = 0
|
||
currLine = 0
|
||
escapeCount += 1
|
||
#print(index)
|
||
if not include_box_breaks:
|
||
#print(f"ERROR! Made a line break when disabled, sentence \"{outStr}\" is too long!")
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"ERROR! Made a line break when disabled, sentence \"{outStr}\" is too long!"
|
||
|
||
if escapeCount == 100:
|
||
#print(f"ERROR! Sentence \"{out}\" is too long!")
|
||
next_key = max(mainDict[lang.name]["Errors"].keys(), default =- 1) + 1
|
||
mainDict[lang.name]["Errors"][next_key] = f"ERROR! Sentence \"{out}\" is too long!"
|
||
|
||
# Some cases that should be fixed
|
||
exitLoop = False
|
||
while(not exitLoop):
|
||
newStr = outStr
|
||
# A space right before a newline just takes up space
|
||
newStr = newStr.replace(" Ň", "Ň")
|
||
# Newlines shouldn't happen right after a new textbox
|
||
newStr = newStr.replace("ȼŇ", "ȼ")
|
||
# Nor should newlines be right before a new textbox
|
||
newStr = newStr.replace("Ňȼ", "ȼ")
|
||
# Nor should a new textbox be after a new textbox
|
||
newStr = newStr.replace("ȼȼ", "ȼ")
|
||
# Nor should a new scroll be after a new textbox
|
||
newStr = newStr.replace("Şȼ", "Ş")
|
||
# Nor should a new scroll be after a new textbox
|
||
newStr = newStr.replace("ȼŞ", "ȼ")
|
||
|
||
exitLoop = (newStr == outStr)
|
||
outStr = newStr
|
||
|
||
byteStr = ""
|
||
if lang == Languages.Japanese:
|
||
arr = jpnCharArray
|
||
else:
|
||
arr = engCharArray
|
||
for char in outStr[:-1]:
|
||
byteStr += f"{convertByte(ord(char), arr):02x} "
|
||
if (len(outStr) > 0 and outStr[-1] != ' '): # Check if the last char is a space
|
||
byteStr += f"{convertByte(ord(outStr[-1]), arr):02x} "
|
||
|
||
byteStr += "ff"
|
||
|
||
ogDict["bytes"] = byteStr
|
||
return ogDict
|
||
|
||
def write_text_bin_file(filename, dictionary):
|
||
with open(filename, 'wb') as binFile:
|
||
# Let the first byte indicate the number of entries
|
||
dict_size = len(dictionary)
|
||
# We need to store 2 bytes instead of one, because not aligning the data to 16 bits will cause corruption on the gba.
|
||
binFile.write(bytes([dict_size & 0xFF, (dict_size >> 8) & 0xFF]))
|
||
# After this initial byte, we will read the offset (16 bit) of each line (relative to the last index byte)
|
||
index = bytearray(len(dictionary) * 2)
|
||
# bindata will contain the binary data of each entry
|
||
bindata = bytearray()
|
||
current_offset = 0
|
||
|
||
num = 0
|
||
# Append every line's binary data to bindata
|
||
# keep an index of the binary offset within bindata at which each line starts
|
||
for key, line in dictionary.items():
|
||
dictionary[key] = convert_item(line)
|
||
# store the offset of the line in the index as a 16 bit little endian value
|
||
index[num * 2] = (current_offset & 0xFF)
|
||
index[num * 2 + 1] = (current_offset >> 8) & 0xFF
|
||
linedata = bytes.fromhex(dictionary[key]['bytes'])
|
||
bindata.extend(linedata)
|
||
current_offset += len(linedata)
|
||
|
||
if len(linedata) > 1024:
|
||
print(f"Error: entry '{key}' numBytes exceeds 1024 (got {len(linedata)})", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
num += 1
|
||
|
||
# Write the index and bindata to the file
|
||
binFile.write(index)
|
||
binFile.write(bindata)
|
||
binFile.close()
|
||
|
||
def write_enum_to_header_file(hFile, prefix, dictionary):
|
||
num = 0
|
||
for key, line in dictionary.items():
|
||
hFile.write(f"#define {prefix}{key} {num}\n")
|
||
num += 1
|
||
hFile.write("\n")
|
||
return num
|
||
|
||
|
||
print("Starting parse:")
|
||
|
||
currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Translations")
|
||
for row in currSheet.iterrows():
|
||
#print(row)
|
||
for lang in Languages:
|
||
currRow = row[1]
|
||
#print(currRow)
|
||
offset = lang.value
|
||
if (pd.isna(currRow.iloc[7 + lang.value])):
|
||
offset = Languages.English.value
|
||
mainDict[lang.name][currRow.iloc[0]][currRow.iloc[1]] = {"bytes": currRow.iloc[7 + offset],
|
||
"numLines": currRow.iloc[2],
|
||
"pixelsPerChar": currRow.iloc[3],
|
||
"pixelsInLine" : currRow.iloc[4],
|
||
"includeBoxBreaks": currRow.iloc[5],
|
||
}
|
||
|
||
# generate the header file
|
||
with open (os.curdir + '/include/translated_text.h', 'w') as hFile:
|
||
hFile.write("#ifndef DIALOGUE_H\n#define DIALOGUE_H\n\n#include <tonc.h>\n\n")
|
||
|
||
# PTGB
|
||
num = write_enum_to_header_file(hFile, "", mainDict[lang.name]["PTGB"])
|
||
hFile.write(f"\n#define DIA_SIZE {num}\n#define DIA_END DIA_SIZE\n\n")
|
||
|
||
# RSEFRLG
|
||
write_enum_to_header_file(hFile, "RSEFRLG_", mainDict[lang.name]["RSEFRLG"])
|
||
|
||
# GENERAL
|
||
write_enum_to_header_file(hFile, "GENERAL_", mainDict[lang.name]["GENERAL"])
|
||
|
||
# CREDITS
|
||
write_enum_to_header_file(hFile, "CREDITS_", mainDict[lang.name]["CREDITS"])
|
||
|
||
# PKMN_NAMES
|
||
write_enum_to_header_file(hFile, "PKMN_NAMES_", mainDict[lang.name]["PKMN_NAMES"])
|
||
|
||
hFile.write("/** Returns the LZ10 compressed PTGB text table.*/\n")
|
||
hFile.write("const u8* get_compressed_PTGB_table();\n\n")
|
||
hFile.write("/** Returns the LZ10 compressed RSEFRLG text table.*/\n")
|
||
hFile.write("const u8* get_compressed_rsefrlg_table();\n\n")
|
||
hFile.write("/** Returns the LZ10 compressed GENERAL text table.*/\n")
|
||
hFile.write("const u8* get_compressed_general_table();\n\n")
|
||
hFile.write("/** Returns the LZ10 compressed CREDITS text table.*/\n")
|
||
hFile.write("const u8* get_compressed_credits_table();\n\n")
|
||
hFile.write("/** Returns the LZ10 compressed PKMN_NAMES text table.*/\n")
|
||
hFile.write("const u8* get_compressed_pkmn_names_table();\n\n")
|
||
|
||
hFile.write("\n#endif")
|
||
hFile.close()
|
||
|
||
# now generate the text tables
|
||
for lang in Languages:
|
||
# PTGB
|
||
table_file = os.curdir + '/to_compress/PTGB_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["PTGB"])
|
||
|
||
# RSEFRLG
|
||
table_file = os.curdir + '/to_compress/RSEFRLG_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["RSEFRLG"])
|
||
|
||
# GENERAL
|
||
table_file = os.curdir + '/to_compress/GENERAL_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["GENERAL"])
|
||
|
||
# CREDITS
|
||
table_file = os.curdir + '/to_compress/CREDITS_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["CREDITS"])
|
||
|
||
# PKMN_NAMES
|
||
table_file = os.curdir + '/to_compress/PKMN_NAMES_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name]["PKMN_NAMES"])
|
||
|
||
# now generate the cpp file.
|
||
with open(os.curdir + '/source/translated_text.cpp', 'w') as cppFile:
|
||
cppFile.write("#include \"translated_text.h\"\n#include \"debug_mode.h\"\n#include \"extern_pokemon_data.h\"\n")
|
||
# generate includes for each language
|
||
for lang in Languages:
|
||
for cat in mainDict[lang.name]:
|
||
if cat in {"PTGB", "RSEFRLG", "GENERAL", "CREDITS", "PKMN_NAMES"}:
|
||
cppFile.write("#include \"" + cat.upper() + "_" + lang.name.lower() + "_lz10_bin.h\"\n")
|
||
|
||
for lang in Languages:
|
||
cppFile.write(f"\n#if PTGB_BUILD_LANGUAGE == {lang.value + 1}\n")
|
||
# PTGB
|
||
cppFile.write("const u8* get_compressed_PTGB_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn PTGB_" + lang.name.lower() + "_lz10_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# RSEFRLG
|
||
cppFile.write("const u8* get_compressed_rsefrlg_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn RSEFRLG_" + lang.name.lower() + "_lz10_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# GENERAL
|
||
cppFile.write("const u8* get_compressed_general_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn GENERAL_" + lang.name.lower() + "_lz10_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# CREDITS
|
||
cppFile.write("const u8* get_compressed_credits_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn CREDITS_" + lang.name.lower() + "_lz10_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
# PKMN_NAMES
|
||
cppFile.write("const u8* get_compressed_pkmn_names_table()\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\treturn PKMN_NAMES_" + lang.name.lower() + "_lz10_bin;\n")
|
||
cppFile.write("}\n\n")
|
||
|
||
cppFile.write(f"#endif\n\n\n")
|
||
|
||
|
||
for lang in Languages:
|
||
for cat in mainDict[lang.name]:
|
||
if cat in {"PTGB", "RSEFRLG", "GENERAL", "CREDITS", "PKMN_NAMES"}:
|
||
for item in mainDict[lang.name][cat]:
|
||
string = mainDict[lang.name][cat][item]["bytes"].split(" ")
|
||
outText = ""
|
||
if lang == Languages.Japanese:
|
||
arr = jpnCharArray
|
||
else:
|
||
arr = engCharArray
|
||
for byte in string:
|
||
byte = engCharArray[int(byte, 16)]
|
||
outText += chr(byte)
|
||
mainDict[lang.name][cat][item]["text"] = outText
|
||
|
||
with open(dir + '/output.json', 'w') as jsonFile:
|
||
jsonFile.write(json.dumps(mainDict))
|
||
|
||
print("Parse finished!\n")
|