# import pandas lib as pd
import pandas as pd
import os 
from enum import Enum
import json
import requests
from collections import defaultdict
import copy
import math
import sys
import filecmp
from pathlib import Path


engCharArray = [
0x20, 	0xC0, 	0xC1, 	0xC2, 	0xC7, 	0xC8, 	0xC9, 	0xCA, 	0xCB, 	0xCC, 	0x20, 	0xCE, 	0xCF, 	0xD2, 	0xD3, 	0xD4, 
0x152, 	0xD9, 	0xDA, 	0xDB, 	0xD1, 	0xDF, 	0xE0, 	0xE1, 	0x20, 	0xE7, 	0xE8, 	0xE9, 	0xEA, 	0xEB, 	0xEC, 	0x20, 
0xEE, 	0xEF, 	0xF2, 	0xF3, 	0xF4, 	0x153, 	0xF9, 	0xFA, 	0xFB, 	0xF1, 	0xBA, 	0xAA, 	0x1D49, 	0x26, 	0x2B, 	0x20, 
0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x3D, 	0x3B, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 
0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 
0x25AF, 	0xBF, 	0xA1, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0xCD, 	0x25, 	0x28, 	0x29, 	0x20, 	0x20, 
0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0xE2, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0xED, 
0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x2B07, 	0x2B05, 	0x27A1, 	0x20, 	0x20, 	0x20, 
0x20, 	0x20, 	0x20, 	0x20, 	0x1D49, 	0x3C, 	0x3E, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 
0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 
0x2B3, 	0x30, 	0x31, 	0x32, 	0x33, 	0x34, 	0x35, 	0x36, 	0x37, 	0x38, 	0x39, 	0x21, 	0x3F, 	0x2E, 	0x2D, 	0x30FB, 
0x2025, 	0x201C, 	0x201D, 	0x2018, 	0x2019, 	0x2642, 	0x2640, 	0x20, 	0x2C, 	0xD7, 	0x2F, 	0x41, 	0x42, 	0x43, 	0x44, 	0x45, 
0x46, 	0x47, 	0x48, 	0x49, 	0x4A, 	0x4B, 	0x4C, 	0x4D, 	0x4E, 	0x4F, 	0x50, 	0x51, 	0x52, 	0x53, 	0x54, 	0x55, 
0x56, 	0x57, 	0x58, 	0x59, 	0x5A, 	0x61, 	0x62, 	0x63, 	0x64, 	0x65, 	0x66, 	0x67, 	0x68, 	0x69, 	0x6A, 	0x6B, 
0x6C, 	0x6D, 	0x6E, 	0x6F, 	0x70, 	0x71, 	0x72, 	0x73, 	0x74, 	0x75, 	0x76, 	0x77, 	0x78, 	0x79, 	0x7A, 	0x25B6, 
0x3A, 	0xC4, 	0xD6, 	0xDC, 	0xE4, 	0xF6, 	0xFC, 	0x2A, 	0x20, 	0x20, 	0x15E, 	0x23C, 	0x206, 	0x1B2, 	0x147, 	0x19E, 
]

engCharWidthArray = [
    0x4, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x0, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 
0x8, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x0, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x0, 
0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x8, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x9, 	0x6, 	0x6, 	0x0, 
0x0, 	0x0, 	0x0, 	0x0, 	0xA, 	0x8, 	0x3, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 
0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 
0x6, 	0x6, 	0x4, 	0x8, 	0x8, 	0x8, 	0x7, 	0x8, 	0x8, 	0x4, 	0x6, 	0x6, 	0x4, 	0x4, 	0x0, 	0x0, 
0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x6, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x6, 
0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x7, 	0x7, 	0x7, 	0x7, 	0x2, 	0x3, 	0x4, 
0x5, 	0x5, 	0x6, 	0x7, 	0x5, 	0x6, 	0x6, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 
0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 
0x8, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x4, 	0x6, 	0x3, 	0x6, 	0x3, 
0x6, 	0x6, 	0x6, 	0x3, 	0x3, 	0x6, 	0x6, 	0x6, 	0x3, 	0x7, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 
0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 
0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x4, 	0x5, 	0x6, 
0x4, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x5, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x8, 
0x3, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x6, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x38, 	0x0, 	0x0, ]

jpnCharArray = [
0x20, 	0x3042, 	0x3044, 	0x3046, 	0x3048, 	0x304A, 	0x304B, 	0x304D, 	0x304F, 	0x3051, 	0x3053, 	0x3055, 	0x3057, 	0x3059, 	0x305B, 	0x305D, 
0x305F, 	0x3061, 	0x3064, 	0x3066, 	0x3068, 	0x306A, 	0x306B, 	0x306C, 	0x306D, 	0x306E, 	0x306F, 	0x3072, 	0x3075, 	0x3078, 	0x307B, 	0x307E, 
0x307F, 	0x3080, 	0x3081, 	0x3082, 	0x3084, 	0x3086, 	0x3088, 	0x3089, 	0x308A, 	0x308B, 	0x308C, 	0x308D, 	0x308F, 	0x3092, 	0x3093, 	0x3041, 
0x3043, 	0x3045, 	0x3047, 	0x3049, 	0x3083, 	0x3085, 	0x3087, 	0x304C, 	0x304E, 	0x3050, 	0x3052, 	0x3054, 	0x3056, 	0x3058, 	0x305A, 	0x305C, 
0x305E, 	0x3060, 	0x3062, 	0x3065, 	0x3067, 	0x3069, 	0x3070, 	0x3073, 	0x3076, 	0x3079, 	0x307C, 	0x3071, 	0x3074, 	0x3077, 	0x307A, 	0x307D, 
0x3063, 	0x30A2, 	0x30A4, 	0x30A6, 	0x30A8, 	0x30AA, 	0x30AB, 	0x30AD, 	0x30AF, 	0x30B1, 	0x30B3, 	0x30B5, 	0x30B7, 	0x30B9, 	0x30BB, 	0x30BD, 
0x30BF, 	0x30C1, 	0x30C4, 	0x30C6, 	0x30C8, 	0x30CA, 	0x30CB, 	0x30CC, 	0x30CD, 	0x30CE, 	0x30CF, 	0x30D2, 	0x30D5, 	0x30D8, 	0x30DB, 	0x30DE, 
0x30DF, 	0x30E0, 	0x30E1, 	0x30E2, 	0x30E4, 	0x30E6, 	0x30E8, 	0x30E9, 	0x30EA, 	0x30EB, 	0x30EC, 	0x30ED, 	0x30EF, 	0x30F2, 	0x30F3, 	0x30A1, 
0x30A3, 	0x30A5, 	0x30A7, 	0x30A9, 	0x30E3, 	0x30E5, 	0x30E7, 	0x30AC, 	0x30AE, 	0x30B0, 	0x30B2, 	0x30B4, 	0x30B6, 	0x30B8, 	0x30BA, 	0x30BC, 
0x30BE, 	0x30C0, 	0x30C2, 	0x30C5, 	0x30C7, 	0x30C9, 	0x30D0, 	0x30D3, 	0x30D6, 	0x30D9, 	0x30DC, 	0x30D1, 	0x30D4, 	0x30D7, 	0x30DA, 	0x30DD, 
0x30C3, 	0x30, 	0x31, 	0x32, 	0x33, 	0x34, 	0x35, 	0x36, 	0x37, 	0x38, 	0x39, 	0xFF01, 	0xFF1F, 	0x3002, 	0x30FC, 	0x30FB, 
0x30FB, 	0x300E, 	0x300F, 	0x300C, 	0x300D, 	0x2642, 	0x2640, 	0x5186, 	0x2E, 	0xD7, 	0x2F, 	0x41, 	0x42, 	0x43, 	0x44, 	0x45, 
0x46, 	0x47, 	0x48, 	0x49, 	0x4A, 	0x4B, 	0x4C, 	0x4D, 	0x4E, 	0x4F, 	0x50, 	0x51, 	0x52, 	0x53, 	0x54, 	0x55, 
0x56, 	0x57, 	0x58, 	0x59, 	0x5A, 	0x61, 	0x62, 	0x63, 	0x64, 	0x65, 	0x66, 	0x67, 	0x68, 	0x69, 	0x6A, 	0x6B, 
0x6C, 	0x6D, 	0x6E, 	0x6F, 	0x70, 	0x71, 	0x72, 	0x73, 	0x74, 	0x75, 	0x76, 	0x77, 	0x78, 	0x79, 	0x7A, 	0x25B6, 
0x3A, 	0xC4, 	0xD6, 	0xDC, 	0xE4, 	0xF6, 	0xFC, 	0x2A, 	0x20, 	0x20, 	0x15E, 	0x23C, 	0x206, 	0x1B2, 	0x147, 	0x19E, 
]

jpnCharWidthArray = [
    0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 
0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x8, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x0, 	0x28, 	0x0, 	0x0, ]

charConversionList = [
    # replaces the first char in the list with the latter
    ["'", "’"],
]

itlEscapeCharConversionList = [
    ["{SCL}", [0xFA]],
    ["{CLR}", [0xFB]],
    ["{DEF}", [0xFC, 0x01, 0x02]],
    ["{FEM}", [0xFC, 0x01, 0x04]],
    ["{FPC}", [0xFC, 0x01, 0x06]],
    ["{MLE}", [0xFC, 0x01, 0x08]],
    ["{PLR}", [0xFD, 0x01]],
    ["{NEW}", [0xFE]],
    ["{END}", [0xFF]],
]

jpnEscapeCharConversionList = [
    ["{SCL}", [0xFA]],
    ["{CLR}", [0xFB]],
    ["{DEF}", [0xFC, 0x06, 0x02]],
    ["{FEM}", [0xFC, 0x06, 0x03]], # ???
    ["{MLE}", [0xFC, 0x06, 0x04]],
    ["{FPC}", [0xFC, 0x06, 0x05]],
    ["{PLR}", [0xFD, 0x01]],
    ["{NEW}", [0xFE]],
    ["{END}", [0xFF]],
]


def logWarningError(type, text):
    nType = type + "s"
    nText = type + ": " + text
    if nText not in mainDict[lang.name][nType].values():
        mainDict[lang.name][nType][max(mainDict[lang.name][nType].keys(), default =- 1) + 1] = nText
        print(nText)

def convertByte(incoming, array):
    for pair in charConversionList:
        if incoming == ord(pair[0]):
            incoming = ord(pair[1])
            logWarningError("Warning", f"Character {pair[0]} was used but is not in character table. Replaced with {pair[1]} .")
    
    index = 0
    for val in array:
        if val == incoming:
            return index
        index += 1    
    logWarningError("Error", f"No match found for char [ {chr(incoming)} ]!")
    return 0
 
def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine):
    # If we can optimize this to remove the spaces, it could save a few bytes.
    splitChars = [' ', '、']
    outStr = ""
    currLine = ""
    lineCount = 0
    currWordIndex = 0
    lineLength = 0
    spaceLength = 0
    for char in splitChars:
        sentence.replace(char, " ")
    words = sentence.split()        


    while(currWordIndex < len(words)):
        word = words[currWordIndex]
        wordLength = 0
        # print(word)
        
        # Figure out the length of the word in pixels
        for char in word:
            if (pixelsPerChar == "Variable"):
                if(lang == Languages.Japanese):
                    wordLength += jpnCharWidthArray[convertByte(ord(char), jpnCharArray)]
                    spaceLength = jpnCharWidthArray[convertByte(ord(' '), jpnCharArray)]
                else:
                    wordLength += engCharWidthArray[convertByte(ord(char), engCharArray)]
                    spaceLength = engCharWidthArray[convertByte(ord(' '), engCharArray)]
                    
            elif (pixelsPerChar == "Default"):
                if (lang == Languages.Japanese):
                    wordLength += 8
                    spaceLength = 8
                
                else:
                    wordLength += 6
                    spaceLength = 6
        
        # See if the whole sentence is a newline
        if (sentence == "Ň"):
            outStr += "Ň"
            currLine = ""
            lineCount += 1
            offset = 0
            lineLength = 0
            currWordIndex += 1
            
        # See if the sentence is a new box
        elif(sentence == "Ş" or sentence == "ȼ"):
            outStr += sentence
            currLine = ""
            offset = 0
            lineLength = 0
            currWordIndex += 1
            
        # Test if the word is too long in general
        elif (wordLength > pixelsInLine):
            logWarningError("Error", f"Word {word} exceeds alloted length ({pixelsInLine} pixels)")
            currWordIndex += 1
            
        # Test if adding the word will go over our alloted space
        elif ((wordLength + lineLength + offset) <= pixelsInLine):
            # If not, add the word and increase the index
            currLine += (word + " ")
            lineLength += (wordLength + spaceLength)
            currWordIndex += 1
            
        # We need to move to the next line
        else:
            # Every line should already have a space at the end of it. Remove it here
            outStr += (currLine[:-1] + "Ň")
            currLine = ""
            lineCount += 1
            lineLength = 0
            offset = 0
                
    currLine = currLine.replace("。 ", "。") # Get rid of the space after the Japanese peroid
    outStr += currLine
    return lineLength + offset, lineCount, outStr

def split_into_sentences(text: str) -> list[str]:
    # -*- coding: utf-8 -*-
    import re
    alphabets= r"([A-Za-z])"
    prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]"
    suffixes = r"(Inc|Ltd|Jr|Sr|Co)"
    starters = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
    acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)"
    websites = r"[.](com|net|org|io|gov|edu|me)"
    digits = r"([0-9])"
    multiple_dots = r'\.{2,}'

    """
    Split the text into sentences.

    If the text contains substrings "<prd>" or "<stop>", they would lead 
    to incorrect splitting because they are used as markers for splitting.

    :param text: text to be split into sentences
    :type text: str

    :return: list of sentences
    :rtype: list[str]
    """
    text = " " + text + "  "
    text = text.replace("\n"," ")
    text = re.sub(prefixes,"\\1<prd>",text)
    text = re.sub(websites,"<prd>\\1",text)
    text = re.sub(digits + "[.]" + digits,"\\1<prd>\\2",text)
    text = re.sub(multiple_dots, lambda match: "<prd>" * len(match.group(0)) + "<stop>", text)
    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
    text = re.sub(r"\s" + alphabets + "[.] "," \\1<prd> ",text)
    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
    text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    if "。" in text: text = text.replace("。\"","\"。") # Added for Japanese support
    if "？" in text: text = text.replace("？\"","\"？") # Added for Japanese support
    if "！" in text: text = text.replace("！\"","\"！") # Added for Japanese support
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    text = text.replace("。","。<stop>") # Added for Japanese support
    text = text.replace("？","？<stop>") # Added for Japanese support
    text = text.replace("！","！<stop>") # Added for Japanese support
    text = text.replace("<prd>",".")
    text = text.replace("Ň", "<stop>Ň<stop>") # Split newlines into their own sentences
    text = text.replace("ȼ", "<stop>ȼ<stop>") # Split new boxes into their own sentences
    text = text.replace("Ş", "<stop>Ş<stop>") # Split new boxes into their own sentences
    sentences = text.split("<stop>")
    sentences = [s.strip() for s in sentences]
    if sentences and not sentences[-1]: sentences = sentences[:-1]
    return sentences

class Languages(Enum):
    Japanese = 0
    English = 1
    French = 2
    German = 3
    Italian = 4
    SpanishEU = 5
    SpanishLA = 6

mainDict = {}

def convert_item(ogDict):
    line = ogDict["bytes"]
    numLines = ogDict["numLines"]
    pixelsPerChar = ogDict["pixelsPerChar"]
    pixelsInLine = ogDict["pixelsInLine"]
    include_box_breaks = ogDict["includeBoxBreaks"]

    if lang == Languages.Japanese:
        arr = jpnCharArray
        list = jpnEscapeCharConversionList
    else:
        arr = engCharArray
        list = itlEscapeCharConversionList
    for pair in list:
        if pair[0] in line:
            escapeString = ""
            for char in pair[1]:
                escapeString += chr(arr[char])
            #print(f"Replacing {pair[0]} with {escapeString}!")
            line = line.replace(pair[0], escapeString)
            #print(line)

    split_sents = split_into_sentences(line)
    index = 0
    outStr = ""
    currLine = 0
    offset = 0
    escapeCount = 0
    while index < len(split_sents) and escapeCount < 100:
        offset, recievedLine, out = SplitSentenceIntoLines(split_sents[index], offset, pixelsPerChar, pixelsInLine)
        currLine += recievedLine
        
        if (out == "ȼ"):
            offset = 0
            currLine = 0
            outStr = outStr[:-1]
            outStr += "ȼ"
            index += 1
        elif (currLine < numLines):
            #print(split_sents[index])
            index += 1
            outStr += out
        else:
            outStr = outStr[:-1]
            outStr += "ȼ" # new textbox character
            offset = 0
            currLine = 0
            escapeCount += 1
            #print(index)
            if not include_box_breaks:
                logWarningError("Error", f"Made a line break when disabled, sentence \"{outStr}\" is too long!")
            
    if escapeCount == 100:
        logWarningError("Error", f"Sentence \"{out}\" is too long!")
            
    # Some cases that should be fixed
    exitLoop = False
    while(not exitLoop):
        newStr = outStr
        # A space right before a newline just takes up space
        newStr = newStr.replace(" Ň", "Ň")
        # Newlines shouldn't happen right after a new textbox
        newStr = newStr.replace("ȼŇ", "ȼ")
        # Nor should newlines be right before a new textbox
        newStr = newStr.replace("Ňȼ", "ȼ")
        # Nor should a new textbox be after a new textbox
        newStr = newStr.replace("ȼȼ", "ȼ")
        # Nor should a new scroll be after a new textbox
        newStr = newStr.replace("Şȼ", "Ş")
        # Nor should a new scroll be after a new textbox
        newStr = newStr.replace("ȼŞ", "ȼ")
        
        if len(newStr) > 1023:
            newStr = newStr[:1023]
            logWarningError("Warning", f"String {newStr} exceeds character limit of 1023 and has been truncated.")

        exitLoop = (newStr == outStr)
        outStr = newStr
    
    byteStr = ""
    if lang == Languages.Japanese:
        arr = jpnCharArray
    else:
        arr = engCharArray
    for char in outStr[:-1]:
        byteStr += f"{convertByte(ord(char), arr):02x} "
    if (len(outStr) > 0 and outStr[-1] != ' '): # Check if the last char is a space
        byteStr += f"{convertByte(ord(outStr[-1]), arr):02x} "
        
    byteStr += "ff"
    
    ogDict["bytes"] = byteStr
    return ogDict

def write_text_bin_file(filename, dictionary):
    with open(filename, 'wb') as binFile:
        # Let the first byte indicate the number of entries
        dict_size = len(dictionary)
        # We need to store 2 bytes instead of one, because not aligning the data to 16 bits will cause corruption on the gba.
        binFile.write(bytes([dict_size & 0xFF, (dict_size >> 8) & 0xFF]))
        # After this initial byte, we will read the offset (16 bit) of each line (relative to the last index byte)
        index = bytearray(len(dictionary) * 2)
        # bindata will contain the binary data of each entry
        bindata = bytearray()
        current_offset = 0

        num = 0
        # Append every line's binary data to bindata
        # keep an index of the binary offset within bindata at which each line starts
        for key, line in dictionary.items():
            dictionary[key] = convert_item(line)
            # store the offset of the line in the index as a 16 bit little endian value
            index[num * 2] = (current_offset & 0xFF)
            index[num * 2 + 1] = (current_offset >> 8) & 0xFF
            linedata = bytes.fromhex(dictionary[key]['bytes'])

            bindata.extend(linedata)
            current_offset += len(linedata)

            num += 1

        # Write the index and bindata to the file
        binFile.write(index)
        binFile.write(bindata)
        binFile.close()

def write_enum_to_header_file(hFile, prefix, dictionary):
    num = 0
    for key, line in dictionary.items():
        hFile.write(f"#define {prefix}{key} {num}\n")
        num += 1
    hFile.write(f"\n#define {prefix}LENGTH {num}\n")
    hFile.write("\n")
    return num

# Main
update = True

print ("Running text_helper:")
BASE_DIR = Path(__file__).resolve().parent
FIRST_TRANSLATION_COL_INDEX = 8

# read by default 1st sheet of an excel file
dir = os.curdir + "/text_helper"

if update:

    url = 'https://docs.google.com/spreadsheets/d/14LLs5lLqWasFcssBmJdGXjjYxARAJBa_QUOUhXZt4v8/export?format=xlsx'
    new_file_path = BASE_DIR / 'new_text.xlsx'
    old_file_path = BASE_DIR / 'text.xlsx'
    json_file_path = BASE_DIR / 'output.json'

    offline = False

    # ---- Attempt download ----
    try:
        response = requests.get(url, timeout=5)
        response.raise_for_status()
        with open(new_file_path, 'wb') as f:
            f.write(response.content)
        print("File downloaded successfully")

    except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
        if old_file_path.exists():
            print("No internet. Using cached xlsx.")
            offline = True
        else:
            print("ERROR: No internet and no cached xlsx. Cannot continue.")
            sys.exit(1)

    # ---- Decision logic ----
    if offline:
        # XML exists (guaranteed here)
        if json_file_path.exists():
            print("Offline mode: trusting cached XML + JSON. Skipping parse.\n")
            sys.exit(0)
        else:
            print("Offline mode: XML present but JSON missing. Rebuilding.")

    else:
        # Online mode
        if old_file_path.exists():
            new_df = pd.read_excel(new_file_path, sheet_name="Translations")
            old_df = pd.read_excel(old_file_path, sheet_name="Translations")

            if new_df.equals(old_df):
                print("Downloaded file is identical.")
                new_file_path.unlink()
                if json_file_path.exists():
                    print("Skipping parse.\n")
                    sys.exit(0)
                else:
                    print("JSON missing - forcing rebuild.")
            else:
                old_file_path.unlink()
                new_file_path.rename(old_file_path)

        else:
            print("No cached xlsx - forcing rebuild.")
            new_file_path.rename(old_file_path)


print("Starting parse:")
currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Translations")

textSections = []

for row in currSheet.iterrows():
    currRow = row[1]["Text Section"]
    if (currRow not in textSections):
        textSections.append(currRow)

for lang in Languages:
    mainDict[lang.name] = {}
    for section in textSections:
        mainDict[lang.name][section] = {}
        mainDict[lang.name]["Warnings"] = {}
        mainDict[lang.name]["Errors"] = {}


for row in currSheet.iterrows():
    #print(row)
    for lang in Languages:
        currRow = row[1]
        #print(currRow)
        offset = lang.value
        if (pd.isna(currRow.iloc[FIRST_TRANSLATION_COL_INDEX + lang.value])):
            offset = Languages.English.value
        mainDict[lang.name][currRow.iloc[0]][currRow.iloc[1]] = {"bytes": currRow.iloc[FIRST_TRANSLATION_COL_INDEX + offset],
                                                                    "numLines": currRow.iloc[2],
                                                                    "pixelsPerChar": currRow.iloc[3],
                                                                    "pixelsInLine" : currRow.iloc[4],
                                                                    "includeBoxBreaks": currRow.iloc[5],
                                                                    }

print("\tGenerating header file")
# generate the header file
with open (os.curdir + '/include/translated_text.h', 'w') as hFile:
    hFile.write("// THIS FILE HAS BEEN GENERATED BY text_helper/main.py !\n\n#ifndef TRANSLATED_TEXT_H\n#define TRANSLATED_TEXT_H\n\n#include <tonc.h>\n\n")

    sectionEnds = []
    index = 0
    for section in textSections:
        num = write_enum_to_header_file(hFile, section + "_", mainDict[lang.name][section])
        hFile.write("#define " + section + "_INDEX " + str(index))
        if(section == "PTGB"):
            hFile.write(f"\n#define DIA_END {num}\n")

        hFile.write("/** Returns the LZ10 compressed " + section + " text table.*/\n")
        sectionEnds.append(num)
        index += 1

    hFile.write("#define NUM_TEXT_SECTIONS " + str(index) + "\n")
    hFile.write("const int text_section_lengths[] = {\n")
    for end in sectionEnds:
        hFile.write("\t" + str(end) + ",\n")
    hFile.write("};\n\n")

    hFile.write("const u8* get_compressed_text_table(int table_index);\n")


    hFile.write("\n#endif")
    hFile.close()

print("\tGenerating text tables")
# now generate the text tables
for lang in Languages:
    for section in textSections:
        table_file = os.curdir + '/to_compress/' + section + '_' + lang.name.lower() + '.bin'
        write_text_bin_file(table_file, mainDict[lang.name][section])

print("\tGenerating cpp file")
# now generate the cpp file.
with open(os.curdir + '/source/translated_text.cpp', 'w') as cppFile:
    cppFile.write("// THIS FILE HAS BEEN GENERATED BY text_helper/main.py !\n#include \"translated_text.h\"\n#include \"debug_mode.h\"\n")
    # generate includes for each language
    for lang in Languages:
        for section in textSections:
            cppFile.write("#include \"" + section.upper() + "_" + lang.name.lower() + "_lz10_bin.h\"\n")

    for lang in Languages:
        cppFile.write(f"\n#if PTGB_BUILD_LANGUAGE == {lang.value + 1}\n")
        cppFile.write("const u8* get_compressed_text_table(int table_index)\n")
        cppFile.write("{\n")
        cppFile.write("\tswitch (table_index)\n\t{\n")
        for section in textSections:
            cppFile.write("\tcase(" + section + "_INDEX):\n")
            if(section == "PTGB"):
                cppFile.write("\tdefault:\n")
            cppFile.write("\t\treturn " + section + "_" + lang.name.lower() + "_lz10_bin;\n")
            cppFile.write("\t\tbreak;\n")
        cppFile.write("\t}\n")
        cppFile.write("}\n\n")
        cppFile.write(f"#endif\n\n\n")


print("\tOutputting json file")
for lang in Languages:
    for section in textSections:
        for item in mainDict[lang.name][section]:
            string = mainDict[lang.name][section][item]["bytes"].split(" ")
            outText = ""
            if lang == Languages.Japanese:
                arr = jpnCharArray
            else:
                arr = engCharArray
            for byte in string:
                byte = arr[int(byte, 16)]
                outText += chr(byte)
            mainDict[lang.name][section][item]["text"] = outText
    
with open(dir + '/output.json', 'w') as jsonFile:
    jsonFile.write(json.dumps(mainDict))
    
print("Parse finished!\n")