mirror of
https://github.com/GearsProgress/Poke_Transporter_GB.git
synced 2026-03-21 17:34:42 -05:00
606 lines
26 KiB
Python
Executable File
606 lines
26 KiB
Python
Executable File
# import pandas lib as pd
|
||
import pandas as pd
|
||
import os
|
||
from enum import Enum
|
||
import json
|
||
import requests
|
||
from collections import defaultdict
|
||
import copy
|
||
import math
|
||
import sys
|
||
import filecmp
|
||
from pathlib import Path
|
||
|
||
|
||
engCharArray = [
|
||
0x20, 0xC0, 0xC1, 0xC2, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0x20, 0xCE, 0xCF, 0xD2, 0xD3, 0xD4,
|
||
0x152, 0xD9, 0xDA, 0xDB, 0xD1, 0xDF, 0xE0, 0xE1, 0x20, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0x20,
|
||
0xEE, 0xEF, 0xF2, 0xF3, 0xF4, 0x153, 0xF9, 0xFA, 0xFB, 0xF1, 0xBA, 0xAA, 0x1D49, 0x26, 0x2B, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x3D, 0x3B, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x25AF, 0xBF, 0xA1, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xCD, 0x25, 0x28, 0x29, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE2, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xED,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2B07, 0x2B05, 0x27A1, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x1D49, 0x3C, 0x3E, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||
0x2B3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x21, 0x3F, 0x2E, 0x2D, 0x30FB,
|
||
0x2025, 0x201C, 0x201D, 0x2018, 0x2019, 0x2642, 0x2640, 0x20, 0x2C, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45,
|
||
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
|
||
0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
|
||
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6,
|
||
0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E,
|
||
]
|
||
|
||
engCharWidthArray = [
|
||
0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x9, 0x6, 0x6, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0xA, 0x8, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x6, 0x6, 0x4, 0x8, 0x8, 0x8, 0x7, 0x8, 0x8, 0x4, 0x6, 0x6, 0x4, 0x4, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x6,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x7, 0x7, 0x7, 0x2, 0x3, 0x4,
|
||
0x5, 0x5, 0x6, 0x7, 0x5, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||
0x8, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x6, 0x3, 0x6, 0x3,
|
||
0x6, 0x6, 0x6, 0x3, 0x3, 0x6, 0x6, 0x6, 0x3, 0x7, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
|
||
0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x4, 0x5, 0x6,
|
||
0x4, 0x6, 0x6, 0x6, 0x6, 0x6, 0x5, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x8,
|
||
0x3, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x38, 0x0, 0x0, ]
|
||
|
||
jpnCharArray = [
|
||
0x20, 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x304B, 0x304D, 0x304F, 0x3051, 0x3053, 0x3055, 0x3057, 0x3059, 0x305B, 0x305D,
|
||
0x305F, 0x3061, 0x3064, 0x3066, 0x3068, 0x306A, 0x306B, 0x306C, 0x306D, 0x306E, 0x306F, 0x3072, 0x3075, 0x3078, 0x307B, 0x307E,
|
||
0x307F, 0x3080, 0x3081, 0x3082, 0x3084, 0x3086, 0x3088, 0x3089, 0x308A, 0x308B, 0x308C, 0x308D, 0x308F, 0x3092, 0x3093, 0x3041,
|
||
0x3043, 0x3045, 0x3047, 0x3049, 0x3083, 0x3085, 0x3087, 0x304C, 0x304E, 0x3050, 0x3052, 0x3054, 0x3056, 0x3058, 0x305A, 0x305C,
|
||
0x305E, 0x3060, 0x3062, 0x3065, 0x3067, 0x3069, 0x3070, 0x3073, 0x3076, 0x3079, 0x307C, 0x3071, 0x3074, 0x3077, 0x307A, 0x307D,
|
||
0x3063, 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30AB, 0x30AD, 0x30AF, 0x30B1, 0x30B3, 0x30B5, 0x30B7, 0x30B9, 0x30BB, 0x30BD,
|
||
0x30BF, 0x30C1, 0x30C4, 0x30C6, 0x30C8, 0x30CA, 0x30CB, 0x30CC, 0x30CD, 0x30CE, 0x30CF, 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30DE,
|
||
0x30DF, 0x30E0, 0x30E1, 0x30E2, 0x30E4, 0x30E6, 0x30E8, 0x30E9, 0x30EA, 0x30EB, 0x30EC, 0x30ED, 0x30EF, 0x30F2, 0x30F3, 0x30A1,
|
||
0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30E3, 0x30E5, 0x30E7, 0x30AC, 0x30AE, 0x30B0, 0x30B2, 0x30B4, 0x30B6, 0x30B8, 0x30BA, 0x30BC,
|
||
0x30BE, 0x30C0, 0x30C2, 0x30C5, 0x30C7, 0x30C9, 0x30D0, 0x30D3, 0x30D6, 0x30D9, 0x30DC, 0x30D1, 0x30D4, 0x30D7, 0x30DA, 0x30DD,
|
||
0x30C3, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFF01, 0xFF1F, 0x3002, 0x30FC, 0x30FB,
|
||
0x30FB, 0x300E, 0x300F, 0x300C, 0x300D, 0x2642, 0x2640, 0x5186, 0x2E, 0xD7, 0x2F, 0x41, 0x42, 0x43, 0x44, 0x45,
|
||
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
|
||
0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
|
||
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x25B6,
|
||
0x3A, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0x2A, 0x20, 0x20, 0x15E, 0x23C, 0x206, 0x1B2, 0x147, 0x19E,
|
||
]
|
||
|
||
jpnCharWidthArray = [
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
|
||
0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x28, 0x0, 0x0, ]
|
||
|
||
charConversionList = [
|
||
# replaces the first char in the list with the latter
|
||
["'", "’"],
|
||
]
|
||
|
||
itlEscapeCharConversionList = [
|
||
["{SCL}", [0xFA]],
|
||
["{CLR}", [0xFB]],
|
||
["{DEF}", [0xFC, 0x01, 0x02]],
|
||
["{FEM}", [0xFC, 0x01, 0x04]],
|
||
["{FPC}", [0xFC, 0x01, 0x06]],
|
||
["{MLE}", [0xFC, 0x01, 0x08]],
|
||
["{PLR}", [0xFD, 0x01]],
|
||
["{NEW}", [0xFE]],
|
||
["{END}", [0xFF]],
|
||
]
|
||
|
||
jpnEscapeCharConversionList = [
|
||
["{SCL}", [0xFA]],
|
||
["{CLR}", [0xFB]],
|
||
["{DEF}", [0xFC, 0x06, 0x02]],
|
||
["{FEM}", [0xFC, 0x06, 0x03]], # ???
|
||
["{MLE}", [0xFC, 0x06, 0x04]],
|
||
["{FPC}", [0xFC, 0x06, 0x05]],
|
||
["{PLR}", [0xFD, 0x01]],
|
||
["{NEW}", [0xFE]],
|
||
["{END}", [0xFF]],
|
||
]
|
||
|
||
|
||
def logWarningError(type, text):
|
||
nType = type + "s"
|
||
nText = type + ": " + text
|
||
if nText not in mainDict[lang.name][nType].values():
|
||
mainDict[lang.name][nType][max(mainDict[lang.name][nType].keys(), default =- 1) + 1] = nText
|
||
print(nText)
|
||
|
||
def convertByte(incoming, array):
|
||
for pair in charConversionList:
|
||
if incoming == ord(pair[0]):
|
||
incoming = ord(pair[1])
|
||
logWarningError("Warning", f"Character {pair[0]} was used but is not in character table. Replaced with {pair[1]} .")
|
||
|
||
index = 0
|
||
for val in array:
|
||
if val == incoming:
|
||
return index
|
||
index += 1
|
||
logWarningError("Error", f"No match found for char [ {chr(incoming)} ]!")
|
||
return 0
|
||
|
||
def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine):
|
||
# If we can optimize this to remove the spaces, it could save a few bytes.
|
||
splitChars = [' ', '、']
|
||
outStr = ""
|
||
currLine = ""
|
||
lineCount = 0
|
||
currWordIndex = 0
|
||
lineLength = 0
|
||
spaceLength = 0
|
||
for char in splitChars:
|
||
sentence.replace(char, " ")
|
||
words = sentence.split()
|
||
|
||
|
||
while(currWordIndex < len(words)):
|
||
word = words[currWordIndex]
|
||
wordLength = 0
|
||
# print(word)
|
||
|
||
# Figure out the length of the word in pixels
|
||
for char in word:
|
||
if (pixelsPerChar == "Variable"):
|
||
if(lang == Languages.Japanese):
|
||
wordLength += jpnCharWidthArray[convertByte(ord(char), jpnCharArray)]
|
||
spaceLength = jpnCharWidthArray[convertByte(ord(' '), jpnCharArray)]
|
||
else:
|
||
wordLength += engCharWidthArray[convertByte(ord(char), engCharArray)]
|
||
spaceLength = engCharWidthArray[convertByte(ord(' '), engCharArray)]
|
||
|
||
elif (pixelsPerChar == "Default"):
|
||
if (lang == Languages.Japanese):
|
||
wordLength += 8
|
||
spaceLength = 8
|
||
|
||
else:
|
||
wordLength += 6
|
||
spaceLength = 6
|
||
|
||
# See if the whole sentence is a newline
|
||
if (sentence == "Ň"):
|
||
outStr += "Ň"
|
||
currLine = ""
|
||
lineCount += 1
|
||
offset = 0
|
||
lineLength = 0
|
||
currWordIndex += 1
|
||
|
||
# See if the sentence is a new box
|
||
elif(sentence == "Ş" or sentence == "ȼ"):
|
||
outStr += sentence
|
||
currLine = ""
|
||
offset = 0
|
||
lineLength = 0
|
||
currWordIndex += 1
|
||
|
||
# Test if the word is too long in general
|
||
elif (wordLength > pixelsInLine):
|
||
logWarningError("Error", f"Word {word} exceeds alloted length ({pixelsInLine} pixels)")
|
||
currWordIndex += 1
|
||
|
||
# Test if adding the word will go over our alloted space
|
||
elif ((wordLength + lineLength + offset) <= pixelsInLine):
|
||
# If not, add the word and increase the index
|
||
currLine += (word + " ")
|
||
lineLength += (wordLength + spaceLength)
|
||
currWordIndex += 1
|
||
|
||
# We need to move to the next line
|
||
else:
|
||
# Every line should already have a space at the end of it. Remove it here
|
||
outStr += (currLine[:-1] + "Ň")
|
||
currLine = ""
|
||
lineCount += 1
|
||
lineLength = 0
|
||
offset = 0
|
||
|
||
currLine = currLine.replace("。 ", "。") # Get rid of the space after the Japanese peroid
|
||
outStr += currLine
|
||
return lineLength + offset, lineCount, outStr
|
||
|
||
def split_into_sentences(text: str) -> list[str]:
|
||
# -*- coding: utf-8 -*-
|
||
import re
|
||
alphabets= r"([A-Za-z])"
|
||
prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]"
|
||
suffixes = r"(Inc|Ltd|Jr|Sr|Co)"
|
||
starters = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
|
||
acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||
websites = r"[.](com|net|org|io|gov|edu|me)"
|
||
digits = r"([0-9])"
|
||
multiple_dots = r'\.{2,}'
|
||
|
||
"""
|
||
Split the text into sentences.
|
||
|
||
If the text contains substrings "<prd>" or "<stop>", they would lead
|
||
to incorrect splitting because they are used as markers for splitting.
|
||
|
||
:param text: text to be split into sentences
|
||
:type text: str
|
||
|
||
:return: list of sentences
|
||
:rtype: list[str]
|
||
"""
|
||
text = " " + text + " "
|
||
text = text.replace("\n"," ")
|
||
text = re.sub(prefixes,"\\1<prd>",text)
|
||
text = re.sub(websites,"<prd>\\1",text)
|
||
text = re.sub(digits + "[.]" + digits,"\\1<prd>\\2",text)
|
||
text = re.sub(multiple_dots, lambda match: "<prd>" * len(match.group(0)) + "<stop>", text)
|
||
if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
|
||
text = re.sub(r"\s" + alphabets + "[.] "," \\1<prd> ",text)
|
||
text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
|
||
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
|
||
text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
|
||
text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
|
||
text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
|
||
text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
|
||
if "”" in text: text = text.replace(".”","”.")
|
||
if "\"" in text: text = text.replace(".\"","\".")
|
||
if "!" in text: text = text.replace("!\"","\"!")
|
||
if "?" in text: text = text.replace("?\"","\"?")
|
||
if "。" in text: text = text.replace("。\"","\"。") # Added for Japanese support
|
||
if "?" in text: text = text.replace("?\"","\"?") # Added for Japanese support
|
||
if "!" in text: text = text.replace("!\"","\"!") # Added for Japanese support
|
||
text = text.replace(".",".<stop>")
|
||
text = text.replace("?","?<stop>")
|
||
text = text.replace("!","!<stop>")
|
||
text = text.replace("。","。<stop>") # Added for Japanese support
|
||
text = text.replace("?","?<stop>") # Added for Japanese support
|
||
text = text.replace("!","!<stop>") # Added for Japanese support
|
||
text = text.replace("<prd>",".")
|
||
text = text.replace("Ň", "<stop>Ň<stop>") # Split newlines into their own sentences
|
||
text = text.replace("ȼ", "<stop>ȼ<stop>") # Split new boxes into their own sentences
|
||
text = text.replace("Ş", "<stop>Ş<stop>") # Split new boxes into their own sentences
|
||
sentences = text.split("<stop>")
|
||
sentences = [s.strip() for s in sentences]
|
||
if sentences and not sentences[-1]: sentences = sentences[:-1]
|
||
return sentences
|
||
|
||
class Languages(Enum):
|
||
Japanese = 0
|
||
English = 1
|
||
French = 2
|
||
German = 3
|
||
Italian = 4
|
||
SpanishEU = 5
|
||
SpanishLA = 6
|
||
|
||
mainDict = {}
|
||
|
||
def convert_item(ogDict):
|
||
line = ogDict["bytes"]
|
||
numLines = ogDict["numLines"]
|
||
pixelsPerChar = ogDict["pixelsPerChar"]
|
||
pixelsInLine = ogDict["pixelsInLine"]
|
||
include_box_breaks = ogDict["includeBoxBreaks"]
|
||
|
||
if lang == Languages.Japanese:
|
||
arr = jpnCharArray
|
||
list = jpnEscapeCharConversionList
|
||
else:
|
||
arr = engCharArray
|
||
list = itlEscapeCharConversionList
|
||
for pair in list:
|
||
if pair[0] in line:
|
||
escapeString = ""
|
||
for char in pair[1]:
|
||
escapeString += chr(arr[char])
|
||
#print(f"Replacing {pair[0]} with {escapeString}!")
|
||
line = line.replace(pair[0], escapeString)
|
||
#print(line)
|
||
|
||
split_sents = split_into_sentences(line)
|
||
index = 0
|
||
outStr = ""
|
||
currLine = 0
|
||
offset = 0
|
||
escapeCount = 0
|
||
while index < len(split_sents) and escapeCount < 100:
|
||
offset, recievedLine, out = SplitSentenceIntoLines(split_sents[index], offset, pixelsPerChar, pixelsInLine)
|
||
currLine += recievedLine
|
||
|
||
if (out == "ȼ"):
|
||
offset = 0
|
||
currLine = 0
|
||
outStr = outStr[:-1]
|
||
outStr += "ȼ"
|
||
index += 1
|
||
elif (currLine < numLines):
|
||
#print(split_sents[index])
|
||
index += 1
|
||
outStr += out
|
||
else:
|
||
outStr = outStr[:-1]
|
||
outStr += "ȼ" # new textbox character
|
||
offset = 0
|
||
currLine = 0
|
||
escapeCount += 1
|
||
#print(index)
|
||
if not include_box_breaks:
|
||
logWarningError("Error", f"Made a line break when disabled, sentence \"{outStr}\" is too long!")
|
||
|
||
if escapeCount == 100:
|
||
logWarningError("Error", f"Sentence \"{out}\" is too long!")
|
||
|
||
# Some cases that should be fixed
|
||
exitLoop = False
|
||
while(not exitLoop):
|
||
newStr = outStr
|
||
# A space right before a newline just takes up space
|
||
newStr = newStr.replace(" Ň", "Ň")
|
||
# Newlines shouldn't happen right after a new textbox
|
||
newStr = newStr.replace("ȼŇ", "ȼ")
|
||
# Nor should newlines be right before a new textbox
|
||
newStr = newStr.replace("Ňȼ", "ȼ")
|
||
# Nor should a new textbox be after a new textbox
|
||
newStr = newStr.replace("ȼȼ", "ȼ")
|
||
# Nor should a new scroll be after a new textbox
|
||
newStr = newStr.replace("Şȼ", "Ş")
|
||
# Nor should a new scroll be after a new textbox
|
||
newStr = newStr.replace("ȼŞ", "ȼ")
|
||
|
||
if len(newStr) > 1023:
|
||
newStr = newStr[:1023]
|
||
logWarningError("Warning", f"String {newStr} exceeds character limit of 1023 and has been truncated.")
|
||
|
||
exitLoop = (newStr == outStr)
|
||
outStr = newStr
|
||
|
||
byteStr = ""
|
||
if lang == Languages.Japanese:
|
||
arr = jpnCharArray
|
||
else:
|
||
arr = engCharArray
|
||
for char in outStr[:-1]:
|
||
byteStr += f"{convertByte(ord(char), arr):02x} "
|
||
if (len(outStr) > 0 and outStr[-1] != ' '): # Check if the last char is a space
|
||
byteStr += f"{convertByte(ord(outStr[-1]), arr):02x} "
|
||
|
||
byteStr += "ff"
|
||
|
||
ogDict["bytes"] = byteStr
|
||
return ogDict
|
||
|
||
def write_text_bin_file(filename, dictionary):
|
||
with open(filename, 'wb') as binFile:
|
||
# Let the first byte indicate the number of entries
|
||
dict_size = len(dictionary)
|
||
# We need to store 2 bytes instead of one, because not aligning the data to 16 bits will cause corruption on the gba.
|
||
binFile.write(bytes([dict_size & 0xFF, (dict_size >> 8) & 0xFF]))
|
||
# After this initial byte, we will read the offset (16 bit) of each line (relative to the last index byte)
|
||
index = bytearray(len(dictionary) * 2)
|
||
# bindata will contain the binary data of each entry
|
||
bindata = bytearray()
|
||
current_offset = 0
|
||
|
||
num = 0
|
||
# Append every line's binary data to bindata
|
||
# keep an index of the binary offset within bindata at which each line starts
|
||
for key, line in dictionary.items():
|
||
dictionary[key] = convert_item(line)
|
||
# store the offset of the line in the index as a 16 bit little endian value
|
||
index[num * 2] = (current_offset & 0xFF)
|
||
index[num * 2 + 1] = (current_offset >> 8) & 0xFF
|
||
linedata = bytes.fromhex(dictionary[key]['bytes'])
|
||
|
||
bindata.extend(linedata)
|
||
current_offset += len(linedata)
|
||
|
||
num += 1
|
||
|
||
# Write the index and bindata to the file
|
||
binFile.write(index)
|
||
binFile.write(bindata)
|
||
binFile.close()
|
||
|
||
def write_enum_to_header_file(hFile, prefix, dictionary):
|
||
num = 0
|
||
for key, line in dictionary.items():
|
||
hFile.write(f"#define {prefix}{key} {num}\n")
|
||
num += 1
|
||
hFile.write(f"\n#define {prefix}LENGTH {num}\n")
|
||
hFile.write("\n")
|
||
return num
|
||
|
||
# Main
|
||
update = True
|
||
|
||
print ("Running text_helper:")
|
||
BASE_DIR = Path(__file__).resolve().parent
|
||
FIRST_TRANSLATION_COL_INDEX = 8
|
||
|
||
# read by default 1st sheet of an excel file
|
||
dir = os.curdir + "/text_helper"
|
||
|
||
if update:
|
||
|
||
url = 'https://docs.google.com/spreadsheets/d/14LLs5lLqWasFcssBmJdGXjjYxARAJBa_QUOUhXZt4v8/export?format=xlsx'
|
||
new_file_path = BASE_DIR / 'new_text.xlsx'
|
||
old_file_path = BASE_DIR / 'text.xlsx'
|
||
json_file_path = BASE_DIR / 'output.json'
|
||
|
||
offline = False
|
||
|
||
# ---- Attempt download ----
|
||
try:
|
||
response = requests.get(url, timeout=5)
|
||
response.raise_for_status()
|
||
with open(new_file_path, 'wb') as f:
|
||
f.write(response.content)
|
||
print("File downloaded successfully")
|
||
|
||
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
|
||
if old_file_path.exists():
|
||
print("No internet. Using cached xlsx.")
|
||
offline = True
|
||
else:
|
||
print("ERROR: No internet and no cached xlsx. Cannot continue.")
|
||
sys.exit(1)
|
||
|
||
# ---- Decision logic ----
|
||
if offline:
|
||
# XML exists (guaranteed here)
|
||
if json_file_path.exists():
|
||
print("Offline mode: trusting cached XML + JSON. Skipping parse.\n")
|
||
sys.exit(0)
|
||
else:
|
||
print("Offline mode: XML present but JSON missing. Rebuilding.")
|
||
|
||
else:
|
||
# Online mode
|
||
if old_file_path.exists():
|
||
new_df = pd.read_excel(new_file_path, sheet_name="Translations")
|
||
old_df = pd.read_excel(old_file_path, sheet_name="Translations")
|
||
|
||
if new_df.equals(old_df):
|
||
print("Downloaded file is identical.")
|
||
new_file_path.unlink()
|
||
if json_file_path.exists():
|
||
print("Skipping parse.\n")
|
||
sys.exit(0)
|
||
else:
|
||
print("JSON missing - forcing rebuild.")
|
||
else:
|
||
old_file_path.unlink()
|
||
new_file_path.rename(old_file_path)
|
||
|
||
else:
|
||
print("No cached xlsx - forcing rebuild.")
|
||
new_file_path.rename(old_file_path)
|
||
|
||
|
||
|
||
print("Starting parse:")
|
||
currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Translations")
|
||
|
||
textSections = []
|
||
|
||
for row in currSheet.iterrows():
|
||
currRow = row[1]["Text Section"]
|
||
if (currRow not in textSections):
|
||
textSections.append(currRow)
|
||
|
||
for lang in Languages:
|
||
mainDict[lang.name] = {}
|
||
for section in textSections:
|
||
mainDict[lang.name][section] = {}
|
||
mainDict[lang.name]["Warnings"] = {}
|
||
mainDict[lang.name]["Errors"] = {}
|
||
|
||
|
||
for row in currSheet.iterrows():
|
||
#print(row)
|
||
for lang in Languages:
|
||
currRow = row[1]
|
||
#print(currRow)
|
||
offset = lang.value
|
||
if (pd.isna(currRow.iloc[FIRST_TRANSLATION_COL_INDEX + lang.value])):
|
||
offset = Languages.English.value
|
||
mainDict[lang.name][currRow.iloc[0]][currRow.iloc[1]] = {"bytes": currRow.iloc[FIRST_TRANSLATION_COL_INDEX + offset],
|
||
"numLines": currRow.iloc[2],
|
||
"pixelsPerChar": currRow.iloc[3],
|
||
"pixelsInLine" : currRow.iloc[4],
|
||
"includeBoxBreaks": currRow.iloc[5],
|
||
}
|
||
|
||
print("\tGenerating header file")
|
||
# generate the header file
|
||
with open (os.curdir + '/include/translated_text.h', 'w') as hFile:
|
||
hFile.write("// THIS FILE HAS BEEN GENERATED BY text_helper/main.py !\n\n#ifndef TRANSLATED_TEXT_H\n#define TRANSLATED_TEXT_H\n\n#include <tonc.h>\n\n")
|
||
|
||
sectionEnds = []
|
||
index = 0
|
||
for section in textSections:
|
||
num = write_enum_to_header_file(hFile, section + "_", mainDict[lang.name][section])
|
||
hFile.write("#define " + section + "_INDEX " + str(index))
|
||
if(section == "PTGB"):
|
||
hFile.write(f"\n#define DIA_END {num}\n")
|
||
|
||
hFile.write("/** Returns the LZ10 compressed " + section + " text table.*/\n")
|
||
sectionEnds.append(num)
|
||
index += 1
|
||
|
||
hFile.write("#define NUM_TEXT_SECTIONS " + str(index) + "\n")
|
||
hFile.write("const int text_section_lengths[] = {\n")
|
||
for end in sectionEnds:
|
||
hFile.write("\t" + str(end) + ",\n")
|
||
hFile.write("};\n\n")
|
||
|
||
hFile.write("const u8* get_compressed_text_table(int table_index);\n")
|
||
|
||
|
||
hFile.write("\n#endif")
|
||
hFile.close()
|
||
|
||
print("\tGenerating text tables")
|
||
# now generate the text tables
|
||
for lang in Languages:
|
||
for section in textSections:
|
||
table_file = os.curdir + '/to_compress/' + section + '_' + lang.name.lower() + '.bin'
|
||
write_text_bin_file(table_file, mainDict[lang.name][section])
|
||
|
||
print("\tGenerating cpp file")
|
||
# now generate the cpp file.
|
||
with open(os.curdir + '/source/translated_text.cpp', 'w') as cppFile:
|
||
cppFile.write("// THIS FILE HAS BEEN GENERATED BY text_helper/main.py !\n#include \"translated_text.h\"\n#include \"debug_mode.h\"\n")
|
||
# generate includes for each language
|
||
for lang in Languages:
|
||
for section in textSections:
|
||
cppFile.write("#include \"" + section.upper() + "_" + lang.name.lower() + "_lz10_bin.h\"\n")
|
||
|
||
for lang in Languages:
|
||
cppFile.write(f"\n#if PTGB_BUILD_LANGUAGE == {lang.value + 1}\n")
|
||
cppFile.write("const u8* get_compressed_text_table(int table_index)\n")
|
||
cppFile.write("{\n")
|
||
cppFile.write("\tswitch (table_index)\n\t{\n")
|
||
for section in textSections:
|
||
cppFile.write("\tcase(" + section + "_INDEX):\n")
|
||
if(section == "PTGB"):
|
||
cppFile.write("\tdefault:\n")
|
||
cppFile.write("\t\treturn " + section + "_" + lang.name.lower() + "_lz10_bin;\n")
|
||
cppFile.write("\t\tbreak;\n")
|
||
cppFile.write("\t}\n")
|
||
cppFile.write("}\n\n")
|
||
cppFile.write(f"#endif\n\n\n")
|
||
|
||
|
||
print("\tOutputting json file")
|
||
for lang in Languages:
|
||
for section in textSections:
|
||
for item in mainDict[lang.name][section]:
|
||
string = mainDict[lang.name][section][item]["bytes"].split(" ")
|
||
outText = ""
|
||
if lang == Languages.Japanese:
|
||
arr = jpnCharArray
|
||
else:
|
||
arr = engCharArray
|
||
for byte in string:
|
||
byte = arr[int(byte, 16)]
|
||
outText += chr(byte)
|
||
mainDict[lang.name][section][item]["text"] = outText
|
||
|
||
with open(dir + '/output.json', 'w') as jsonFile:
|
||
jsonFile.write(json.dumps(mainDict))
|
||
|
||
print("Parse finished!\n") |