Updating text helper

2026-03-21 17:34:42 -05:00 · 2026-01-20 14:09:00 -05:00 · 2026-01-20 14:09:00 -05:00 · 6220dc1a07
commit 6220dc1a07
parent 1acbe99f93
1 changed files with 98 additions and 81 deletions
--- a/text_helper/main.py
+++ b/text_helper/main.py
@ -11,68 +11,6 @@ import sys
 import filecmp
 from pathlib import Path

-update = True
-
-print ("Running text_helper:")
-BASE_DIR = Path(__file__).resolve().parent
-FIRST_TRANSLATION_COL_INDEX = 8
-
-if update:
-
-    url = 'https://docs.google.com/spreadsheets/d/14LLs5lLqWasFcssBmJdGXjjYxARAJBa_QUOUhXZt4v8/export?format=xlsx'
-    new_file_path = BASE_DIR / 'new_text.xlsx'
-    old_file_path = BASE_DIR / 'text.xlsx'
-    json_file_path = BASE_DIR / 'output.json'
-
-    offline = False
-
-    # ---- Attempt download ----
-    try:
-        response = requests.get(url, timeout=5)
-        response.raise_for_status()
-        with open(new_file_path, 'wb') as f:
-            f.write(response.content)
-        print("File downloaded successfully")
-
-    except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
-        if old_file_path.exists():
-            print("No internet. Using cached xlsx.")
-            offline = True
-        else:
-            print("ERROR: No internet and no cached xlsx. Cannot continue.")
-            sys.exit(1)
-
-    # ---- Decision logic ----
-    if offline:
-        # XML exists (guaranteed here)
-        if json_file_path.exists():
-            print("Offline mode: trusting cached XML + JSON. Skipping parse.\n")
-            sys.exit(0)
-        else:
-            print("Offline mode: XML present but JSON missing. Rebuilding.")
-
-    else:
-        # Online mode
-        if old_file_path.exists():
-            new_df = pd.read_excel(new_file_path, sheet_name="Translations")
-            old_df = pd.read_excel(old_file_path, sheet_name="Translations")
-
-            if new_df.equals(old_df):
-                print("Downloaded file is identical.")
-                new_file_path.unlink()
-                if json_file_path.exists():
-                    print("Skipping parse.\n")
-                    sys.exit(0)
-                else:
-                    print("JSON missing - forcing rebuild.")
-            else:
-                old_file_path.unlink()
-                new_file_path.rename(old_file_path)
-
-        else:
-            print("No cached xlsx - forcing rebuild.")
-            new_file_path.rename(old_file_path)
-

 engCharArray = [
 0x20, 	0xC0, 	0xC1, 	0xC2, 	0xC7, 	0xC8, 	0xC9, 	0xCA, 	0xCB, 	0xCC, 	0x20, 	0xCE, 	0xCF, 	0xD2, 	0xD3, 	0xD4, 
@ -86,7 +24,7 @@ engCharArray = [
 0x20, 	0x20, 	0x20, 	0x20, 	0x1D49, 	0x3C, 	0x3E, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 
 0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 	0x20, 
 0x2B3, 	0x30, 	0x31, 	0x32, 	0x33, 	0x34, 	0x35, 	0x36, 	0x37, 	0x38, 	0x39, 	0x21, 	0x3F, 	0x2E, 	0x2D, 	0x30FB, 
-0x2026, 	0x201C, 	0x201D, 	0x2018, 	0x2019, 	0x2642, 	0x2640, 	0x20, 	0x2C, 	0xD7, 	0x2F, 	0x41, 	0x42, 	0x43, 	0x44, 	0x45, 
+0x2025, 	0x201C, 	0x201D, 	0x2018, 	0x2019, 	0x2642, 	0x2640, 	0x20, 	0x2C, 	0xD7, 	0x2F, 	0x41, 	0x42, 	0x43, 	0x44, 	0x45, 
 0x46, 	0x47, 	0x48, 	0x49, 	0x4A, 	0x4B, 	0x4C, 	0x4D, 	0x4E, 	0x4F, 	0x50, 	0x51, 	0x52, 	0x53, 	0x54, 	0x55, 
 0x56, 	0x57, 	0x58, 	0x59, 	0x5A, 	0x61, 	0x62, 	0x63, 	0x64, 	0x65, 	0x66, 	0x67, 	0x68, 	0x69, 	0x6A, 	0x6B, 
 0x6C, 	0x6D, 	0x6E, 	0x6F, 	0x70, 	0x71, 	0x72, 	0x73, 	0x74, 	0x75, 	0x76, 	0x77, 	0x78, 	0x79, 	0x7A, 	0x25B6, 
@ -119,7 +57,7 @@ jpnCharArray = [
 0x305E, 	0x3060, 	0x3062, 	0x3065, 	0x3067, 	0x3069, 	0x3070, 	0x3073, 	0x3076, 	0x3079, 	0x307C, 	0x3071, 	0x3074, 	0x3077, 	0x307A, 	0x307D, 
 0x3063, 	0x30A2, 	0x30A4, 	0x30A6, 	0x30A8, 	0x30AA, 	0x30AB, 	0x30AD, 	0x30AF, 	0x30B1, 	0x30B3, 	0x30B5, 	0x30B7, 	0x30B9, 	0x30BB, 	0x30BD, 
 0x30BF, 	0x30C1, 	0x30C4, 	0x30C6, 	0x30C8, 	0x30CA, 	0x30CB, 	0x30CC, 	0x30CD, 	0x30CE, 	0x30CF, 	0x30D2, 	0x30D5, 	0x30D8, 	0x30DB, 	0x30DE, 
-0x30DF, 	0x30E0, 	0x30E1, 	0x30E2, 	0x30E4, 	0x30E6, 	0x30E8, 	0x30E9, 	0x30EA, 	0x20, 	0x30EC, 	0x30ED, 	0x30EF, 	0x30F2, 	0x30F3, 	0x30A1, 
+0x30DF, 	0x30E0, 	0x30E1, 	0x30E2, 	0x30E4, 	0x30E6, 	0x30E8, 	0x30E9, 	0x30EA, 	0x30EB, 	0x30EC, 	0x30ED, 	0x30EF, 	0x30F2, 	0x30F3, 	0x30A1, 
 0x30A3, 	0x30A5, 	0x30A7, 	0x30A9, 	0x30E3, 	0x30E5, 	0x30E7, 	0x30AC, 	0x30AE, 	0x30B0, 	0x30B2, 	0x30B4, 	0x30B6, 	0x30B8, 	0x30BA, 	0x30BC, 
 0x30BE, 	0x30C0, 	0x30C2, 	0x30C5, 	0x30C7, 	0x30C9, 	0x30D0, 	0x30D3, 	0x30D6, 	0x30D9, 	0x30DC, 	0x30D1, 	0x30D4, 	0x30D7, 	0x30DA, 	0x30DD, 
 0x30C3, 	0x30, 	0x31, 	0x32, 	0x33, 	0x34, 	0x35, 	0x36, 	0x37, 	0x38, 	0x39, 	0xFF01, 	0xFF1F, 	0x3002, 	0x30FC, 	0x30FB, 
@ -153,12 +91,20 @@ charConversionList = [
    ["'", "’"],
 ]

+escapeCharConversionList = [
+    ["{SCL}", "Ş"],
+    ["{CLR}", "ȼ"],
+    ["{NEW}", "Ň"],
+    ["{END}", "ƞ"],
+]
+
+
 def logWarningError(type, text):
    nType = type + "s"
    nText = type + ": " + text
    if nText not in mainDict[lang.name][nType].values():
        mainDict[lang.name][nType][max(mainDict[lang.name][nType].keys(), default =- 1) + 1] = nText
-        #print(nText)
+        print(nText)

 def convertByte(incoming, array):
    for pair in charConversionList:
@ -166,7 +112,6 @@ def convertByte(incoming, array):
            incoming = ord(pair[1])
            logWarningError("Warning", f"Character {pair[0]} was used but is not in character table. Replaced with {pair[1]} .")
    
-    
    index = 0
    for val in array:
        if val == incoming:
@ -178,13 +123,21 @@ def convertByte(incoming, array):
 def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine):
    # If we can optimize this to remove the spaces, it could save a few bytes.

+    splitChars = [' ', '、']
    outStr = ""
    currLine = ""
    lineCount = 0
    currWordIndex = 0
    lineLength = 0
    spaceLength = 0
+    for char in splitChars:
+        sentence.replace(char, " ")
    words = sentence.split()
+    for word in words:
+        for pair in escapeCharConversionList:
+            word.replace(pair[0], pair[1])
+
+
    while(currWordIndex < len(words)):
        word = words[currWordIndex]
        wordLength = 0
@ -228,7 +181,7 @@ def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine):
            
        # Test if the word is too long in general
        elif (wordLength > pixelsInLine):
-            logWarningError("Error", f"Word {word} exceeds alloted length")
+            logWarningError("Error", f"Word {word} exceeds alloted length ({pixelsInLine} pixels)")
            currWordIndex += 1
            
        # Test if adding the word will go over our alloted space
@ -250,18 +203,18 @@ def SplitSentenceIntoLines(sentence, offset, pixelsPerChar, pixelsInLine):
    outStr += currLine
    return lineLength + offset, lineCount, outStr

-# -*- coding: utf-8 -*-
-import re
-alphabets= r"([A-Za-z])"
-prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]"
-suffixes = r"(Inc|Ltd|Jr|Sr|Co)"
-starters = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
-acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)"
-websites = r"[.](com|net|org|io|gov|edu|me)"
-digits = r"([0-9])"
-multiple_dots = r'\.{2,}'
-
 def split_into_sentences(text: str) -> list[str]:
+    # -*- coding: utf-8 -*-
+    import re
+    alphabets= r"([A-Za-z])"
+    prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]"
+    suffixes = r"(Inc|Ltd|Jr|Sr|Co)"
+    starters = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
+    acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)"
+    websites = r"[.](com|net|org|io|gov|edu|me)"
+    digits = r"([0-9])"
+    multiple_dots = r'\.{2,}'
+
    """
    Split the text into sentences.

@ -319,9 +272,6 @@ class Languages(Enum):
    SpanishEU = 5
    SpanishLA = 6

-# read by default 1st sheet of an excel file
-dir = os.curdir + "/text_helper"
-
 mainDict = {}

 def convert_item(ogDict):
@ -443,6 +393,73 @@ def write_enum_to_header_file(hFile, prefix, dictionary):
    hFile.write("\n")
    return num

+# Main
+update = True
+
+print ("Running text_helper:")
+BASE_DIR = Path(__file__).resolve().parent
+FIRST_TRANSLATION_COL_INDEX = 8
+
+# read by default 1st sheet of an excel file
+dir = os.curdir + "/text_helper"
+
+if update:
+
+    url = 'https://docs.google.com/spreadsheets/d/14LLs5lLqWasFcssBmJdGXjjYxARAJBa_QUOUhXZt4v8/export?format=xlsx'
+    new_file_path = BASE_DIR / 'new_text.xlsx'
+    old_file_path = BASE_DIR / 'text.xlsx'
+    json_file_path = BASE_DIR / 'output.json'
+
+    offline = False
+
+    # ---- Attempt download ----
+    try:
+        response = requests.get(url, timeout=5)
+        response.raise_for_status()
+        with open(new_file_path, 'wb') as f:
+            f.write(response.content)
+        print("File downloaded successfully")
+
+    except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
+        if old_file_path.exists():
+            print("No internet. Using cached xlsx.")
+            offline = True
+        else:
+            print("ERROR: No internet and no cached xlsx. Cannot continue.")
+            sys.exit(1)
+
+    # ---- Decision logic ----
+    if offline:
+        # XML exists (guaranteed here)
+        if json_file_path.exists():
+            print("Offline mode: trusting cached XML + JSON. Skipping parse.\n")
+            sys.exit(0)
+        else:
+            print("Offline mode: XML present but JSON missing. Rebuilding.")
+
+    else:
+        # Online mode
+        if old_file_path.exists():
+            new_df = pd.read_excel(new_file_path, sheet_name="Translations")
+            old_df = pd.read_excel(old_file_path, sheet_name="Translations")
+
+            if new_df.equals(old_df):
+                print("Downloaded file is identical.")
+                new_file_path.unlink()
+                if json_file_path.exists():
+                    print("Skipping parse.\n")
+                    sys.exit(0)
+                else:
+                    print("JSON missing - forcing rebuild.")
+            else:
+                old_file_path.unlink()
+                new_file_path.rename(old_file_path)
+
+        else:
+            print("No cached xlsx - forcing rebuild.")
+            new_file_path.rename(old_file_path)
+
+

 print("Starting parse:")
 currSheet = pd.read_excel(dir + "/text.xlsx", sheet_name="Translations")