pokeplatinum/tools/scripts/migration/json_text_banks.py
2025-08-17 15:50:14 -07:00

77 lines
2.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
This is a standalone script for existing end-users to migrate their text
banks from GMM (XML) to JSON. It requires the GMM files to still be present
on disk; users merging this commit should perform the conversion as part of
their merge from upstream.
"""
import json
import pathlib
import re
import xml.etree.ElementTree as xml
LINE_FEED = re.compile(r"(\r|\n|\f)")
def take_text(node: xml.Element, expected: str) -> str | list[str]:
assert node.tag == "language"
assert node.attrib.get("name", "") == expected
if not node.text:
return ""
text = node.text.replace("\\r", "\r").replace("\\n", "\n").replace("\\f", "\f")
splits = LINE_FEED.split(text)
if len(splits) == 1:
return text
lines: list[str] = []
for i in range(0, len(splits), 2):
pair = splits[i : i + 2]
if len(pair) > 1:
lines.append(f"{pair[0]}{pair[1]}")
else:
lines.append(pair[0])
if not lines[-1]: # Trim a trailing empty line
lines = lines[:-1]
return lines if len(lines) > 1 else lines[0]
def take_entry(entry: xml.Element) -> dict[str, int | str | list[str]]:
id = entry.attrib["id"]
assert entry[0].tag == "attribute", f"{id} in {fname}"
assert entry[0].text in ("used", "garbage"), f"{id} in {fname}"
if entry[0].text == "garbage":
return {
"id": id,
"garbage": 0 if not entry[2].text else len(entry[2].text),
}
else:
return {
"id": id,
"en_US": take_text(entry[1], "English"),
}
root = pathlib.Path("res/text")
for fname in root.rglob("*.gmm"):
bank_data = {}
with open(fname, "r", encoding="utf-8") as f:
bank_tree = xml.parse(f)
bank_root = bank_tree.getroot()
bank_data["key"] = int(bank_root[0].attrib["value"])
bank_data["messages"] = [take_entry(entry) for entry in bank_root[1:]]
out_fname = fname.with_suffix(".json")
with open(out_fname, "w", encoding="utf-8") as fout:
print(
json.dumps(bank_data, ensure_ascii=False, indent=2),
file=fout,
)