pmd-sky/tools/sync_pmdsky_debug/sync_to_pmdsky_debug.py
2025-10-22 22:10:25 -04:00

315 lines
15 KiB
Python

import os
import re
from collections import defaultdict
from dataclasses import dataclass
from typing import Any, Dict, List
from ruamel.yaml.comments import CommentedMap
from ruamel.yaml.scalarint import HexCapsInt, HexInt
from pmdsky_debug_reader import LANGUAGE_KEYS_XMAP_TO_PMDSKY_DEBUG, SYMBOLS_FOLDER, get_pmdsky_debug_location, read_pmdsky_debug_symbols
from symbol_details import ITCM_RAM_START_ADDRESSES, NONMATCHING_SYMBOLS_ARM7, NONMATCHING_SYMBOLS_ARM9, WRAM_OFFSET, SymbolDetails
from xmap_reader import HEADER_FOLDER, read_xmap_symbols
from yaml_writer import YamlManager, yaml
# Syncs symbols from the decomp to a local clone of pmdsky-debug (https://github.com/UsernameFodder/pmdsky-debug).
# To use this script, you will need:
# - A file named pmdsky_debug_location.txt with the file path to your local clone of pmdsky-debug.
# - Python dependencies in requirements.txt.
# Make sure there are no uncommitted changes in pmdsky-debug when running this, in case you need to revert.
pmdsky_debug_symbols = read_pmdsky_debug_symbols()
xmap_symbols = read_xmap_symbols()
pmdsky_debug_location = get_pmdsky_debug_location()
default_symbol_name = re.compile(r'^(?:ov\d{2}|sub)?_[\dA-F]{8}(?:_[\w]{2})?$')
multiple_symbol_suffix = re.compile(r'__[\dA-F]{8}(?:_[\w]{2})?$')
def get_base_symbol_name(symbol_name: str) -> str:
if multiple_symbol_suffix.search(symbol_name):
return symbol_name[:symbol.name.find('__')]
return symbol_name
def read_symbol_array(symbol_path: str, symbol_type_key: str, yaml_manager: YamlManager) -> List[Any]:
symbols_yaml_outer: Dict[str, Any] = yaml_manager.read_yaml(symbol_path)
symbols_yaml: Dict[str, Any] = symbols_yaml_outer[list(symbols_yaml_outer.keys())[0]]
return symbols_yaml[symbol_type_key]
def find_symbol_in_header(symbol_name: str, is_data: bool, header_contents: List[str]) -> int:
for i, line in enumerate(header_contents):
if is_data and re.search(fr' {symbol_name}[\[;]', line) or not is_data and f' {symbol_name}(' in line:
return i
return None
@dataclass
class SubsymbolDir:
file_path: str
addresses: Dict[str, int]
length: Dict[str, int]
subsymbol_dirs = {}
def sync_xmap_symbol(address: int, symbol: SymbolDetails, language: str, section_name: str, yaml_manager: YamlManager, pmdsky_debug_section: Dict[int, SymbolDetails]):
if default_symbol_name.match(symbol.name):
return
language_key = LANGUAGE_KEYS_XMAP_TO_PMDSKY_DEBUG[language]
if section_name == 'arm7':
nonmatching_symbols = NONMATCHING_SYMBOLS_ARM7
else:
nonmatching_symbols = NONMATCHING_SYMBOLS_ARM9
if symbol.name in nonmatching_symbols:
symbol.name = nonmatching_symbols[symbol.name]
base_symbol_name = get_base_symbol_name(symbol.name)
wram_address = None
if section_name == 'arm7' and address >= 0x37F8000:
# Shift ARM 7 WRAM to its ROM location.
wram_address = address
address -= WRAM_OFFSET
path_prefix = os.path.join(pmdsky_debug_location, SYMBOLS_FOLDER)
if base_symbol_name in symbol_file_paths[section_name]:
symbol_path = symbol_file_paths[section_name][base_symbol_name]
base_symbol_path = symbol_path[len(path_prefix) + 1:]
else:
if section_name == 'main':
base_symbol_path = 'arm9.yml'
elif section_name == 'arm7':
base_symbol_path = 'arm7.yml'
elif section_name == 'ITCM':
base_symbol_path = os.path.join('arm9', 'itcm.yml')
elif section_name == 'ram':
base_symbol_path = 'ram.yml'
else:
base_symbol_path = f'overlay{int(section_name):02d}.yml'
symbol_path = os.path.join(path_prefix, base_symbol_path)
# Look through subdirectories to see if the symbol address is within range of them.
subsymbol_dir = symbol_path[:-4]
if subsymbol_dir not in subsymbol_dirs:
subsymbol_dirs[subsymbol_dir] = []
if os.path.exists(subsymbol_dir):
for root, _, files in os.walk(subsymbol_dir):
for file in files:
if file == 'itcm.yml' or not file.endswith('.yml'):
continue
file_path = os.path.join(root, file)
with open(file_path, 'r') as yaml_file:
yaml_contents = yaml.load(yaml_file)
subsymbol_dirs[subsymbol_dir].append(SubsymbolDir(file_path, yaml_contents[file[:-4]]['address'], yaml_contents[file[:-4]]['length']))
if subsymbol_dirs[subsymbol_dir] is not None:
matching_subsymbol_file = None
for file in subsymbol_dirs[subsymbol_dir]:
file_address = file.addresses[language_key]
if address > file_address and address < file_address + file.length[language_key] and (matching_subsymbol_file is None or file_address > matching_subsymbol_file.addresses[language_key]):
matching_subsymbol_file = file
if matching_subsymbol_file is not None:
symbol_path = matching_subsymbol_file.file_path
if symbol.is_data:
symbol_type_key = 'data'
else:
symbol_type_key = 'functions'
if address in pmdsky_debug_section:
# If the address is already defined in pmdsky-debug, add an alias with the new symbol name from the decomp.
old_symbol = pmdsky_debug_section[address]
base_old_symbol_names = [get_base_symbol_name(symbol_name) for symbol_name in old_symbol.get_all_names()]
base_old_symbol_name = base_old_symbol_names[0]
if base_symbol_name not in base_old_symbol_names and base_symbol_name not in symbol_file_paths[section_name]:
print(f'Adding alias for {base_old_symbol_name}: {base_symbol_name}')
symbol_array = read_symbol_array(symbol_path, symbol_type_key, yaml_manager)
for yaml_symbol in symbol_array:
if yaml_symbol['name'] == base_old_symbol_name:
if 'aliases' in yaml_symbol:
if base_symbol_name not in yaml_symbol['aliases']:
yaml_symbol['aliases'].append(base_symbol_name)
else:
yaml_symbol['aliases'] = [base_symbol_name]
break
return
matching_symbol_entry = None
symbol_array = read_symbol_array(symbol_path, symbol_type_key, yaml_manager)
# Add the symbol to the correspond header file.
base_symbol_path = base_symbol_path.replace('.yml', '.h')
header_path = symbol_path.replace(SYMBOLS_FOLDER, os.path.join('headers', symbol_type_key)).replace('.yml', '.h')
if not os.path.exists(header_path):
return
with open(header_path, 'r') as header_file:
header_contents = header_file.readlines()
# Find the existing symbol and replace its address, or make a new one if it isn't there.
symbol_preexisting = False
insert_index = None
target_header_line = None
for i, symbol_entry in enumerate(symbol_array):
if base_symbol_name == symbol_entry['name']:
matching_symbol_entry = symbol_entry
symbol_preexisting = True
break
else:
# Keep track of the symbol directly before the target symbol.
# This will be used as an anchor when appending to the header file.
symbol_header_line = find_symbol_in_header(symbol_entry['name'], symbol.is_data, header_contents)
if symbol_header_line is not None and insert_index is None:
target_header_line = symbol_header_line - 1
if language_key in symbol_entry['address']:
current_symbol_address: int | List[int] = symbol_entry['address'][language_key]
if isinstance(current_symbol_address, list):
current_symbol_address = current_symbol_address[0]
if current_symbol_address > address and insert_index is None:
insert_index = i
if not matching_symbol_entry:
matching_symbol_entry = {
'name': base_symbol_name,
'address': CommentedMap()
}
if insert_index is None:
symbol_array.append(matching_symbol_entry)
else:
symbol_array.insert(insert_index, matching_symbol_entry)
symbol_entry_language_addresses: Dict[str, Any] = matching_symbol_entry['address']
if language_key not in symbol_entry_language_addresses:
symbol_entry_language_addresses[language_key] = None
symbol_entry_addresses: int | List[int] = symbol_entry_language_addresses[language_key]
hex_address = HexCapsInt(address)
# When adding a new EU address, reorder it to the first key in the YAML for consistency with existing pmdsky-debug entries.
reorder_languages = language_key == 'EU' and len(symbol_entry_language_addresses) > 1 and not symbol_entry_language_addresses[language_key]
if multiple_symbol_suffix.search(symbol.name):
if symbol_entry_addresses is None:
symbol_entry_language_addresses[language_key] = [hex_address]
if reorder_languages:
symbol_entry_language_addresses.move_to_end(language_key, last=False)
else:
if isinstance(symbol_entry_addresses, HexCapsInt) or isinstance(symbol_entry_addresses, HexInt):
print(f'Converting single address into array for {base_symbol_name}.')
symbol_entry_addresses = [symbol_entry_addresses]
symbol_entry_language_addresses[language_key] = symbol_entry_addresses
if address not in symbol_entry_addresses:
symbol_entry_addresses.append(hex_address)
symbol_entry_addresses.sort()
return
else:
if section_name == 'ITCM':
# ITCM needs to be handled specially to add both ROM and RAM addresses.
symbol_entry_language_addresses[language_key] = HexCapsInt(ITCM_RAM_START_ADDRESSES[language_key] + (hex_address - 0x1FF8000))
symbol_entry_language_addresses[f'{language_key}-ITCM'] = hex_address
else:
symbol_entry_language_addresses[language_key] = HexCapsInt(hex_address)
if reorder_languages:
symbol_entry_language_addresses.move_to_end(language_key, last=False)
if wram_address is not None:
symbol_entry_language_addresses[language_key + '-WRAM'] = HexCapsInt(wram_address)
if reorder_languages:
symbol_entry_language_addresses.move_to_end('NA-WRAM')
if symbol_preexisting:
print(f'Updating address of {base_symbol_name} (region {language_key}) in {symbol_path}')
else:
print(f'Adding {base_symbol_name} (region {language_key}) to {symbol_path}')
if symbol_preexisting:
return
# Look for the symbol that was immediately before the new symbol in the YAML.
# The new symbol will be added directly after this anchor symbol.
if target_header_line is None:
if 'arm9' in header_path:
for i, line in enumerate(header_contents):
if line.startswith('// If declaring'):
target_header_line = i
break
else:
target_header_line = len(header_contents) - 2
# If the symbol is a data symbol, look through the ASM to find how much space the symbol takes.
symbol_length = 0
string_length = None
if symbol.is_data:
asm_path = os.path.join('asm', symbol.file_path.replace('.o', '.s'))
if os.path.exists(asm_path):
with open(asm_path) as asm_file:
asm_contents = asm_file.readlines()
for i, line in enumerate(asm_contents):
if line.startswith(f'\t.global {base_symbol_name}'):
target_asm_line = asm_contents[i + 2]
string_index = target_asm_line.find('.string "')
if string_index >= 0:
target_string = target_asm_line[string_index + len('.string "'):-2].replace('\\n', 'n')
string_length = len(target_string)
symbol_length = string_length + 1
if symbol_length % 4 > 0:
symbol_length += 4 - symbol_length % 4
break
# Write the new symbol within the header file.
symbol_header_path = os.path.join(HEADER_FOLDER, symbol.file_path.replace('.o', '.h'))
if not os.path.exists(symbol_header_path):
symbol_header_path = os.path.join('lib', 'DSE', symbol_header_path)
symbol_header = None
if symbol.is_data:
if string_length is not None:
symbol_header = f'extern char {base_symbol_name}[{string_length}];\n'
else:
symbol_header = f'extern undefined {base_symbol_name};\n'
elif os.path.exists(symbol_header_path):
with open(symbol_header_path, 'r') as symbol_header_file:
symbol_header_contents = symbol_header_file.readlines()
for line in symbol_header_contents:
if f' {base_symbol_name}(' in line:
symbol_header = line
break
# Match the typedefs used in pmdsky-debug.
if symbol_header is not None:
symbol_header = symbol_header.replace('u32', 'uint32_t')
symbol_header = symbol_header.replace('u16', 'uint16_t')
symbol_header = symbol_header.replace('u8', 'uint8_t')
symbol_header = symbol_header.replace('s32', 'int')
symbol_header = symbol_header.replace('s16', 'int16_t')
symbol_header = symbol_header.replace('s8', 'int8_t')
symbol_header = symbol_header.replace('bool8', 'bool')
symbol_header = symbol_header.replace(' *', '* ')
else:
symbol_header = f'void {base_symbol_name}(void);\n'
header_contents[target_header_line] += symbol_header
print(f'Adding {base_symbol_name} to {header_path}')
with open(header_path, 'w') as header_file:
header_file.writelines(header_contents)
# Extract all pmdsky-debug file paths for symbols between all languages.
symbol_file_paths: Dict[str, Dict[str, str]] = defaultdict(dict)
for language, pmdsky_debug_language_symbols in pmdsky_debug_symbols.items():
for section_name, pmdsky_debug_section in pmdsky_debug_language_symbols.items():
for address, symbol in pmdsky_debug_section.items():
base_symbol_name = get_base_symbol_name(symbol.name)
symbol_file_paths[section_name][base_symbol_name] = symbol.file_path
with YamlManager() as yaml_manager:
for language, xmap_language_symbols in xmap_symbols.items():
pmdsky_debug_language_symbols = pmdsky_debug_symbols[language]
for section_name, xmap_section in xmap_language_symbols.items():
if section_name in pmdsky_debug_language_symbols:
pmdsky_debug_section = pmdsky_debug_language_symbols[section_name]
else:
pmdsky_debug_section = {}
for address, symbol in xmap_section.items():
sync_xmap_symbol(address, symbol, language, section_name, yaml_manager, pmdsky_debug_section)