From 5f2ae6e4f74eefe95e238830b06f14f662077d2f Mon Sep 17 00:00:00 2001 From: Matthew Stanley <1379tech@gmail.com> Date: Tue, 28 Apr 2026 19:44:58 -0700 Subject: [PATCH] recomp: emit content_hash on pattern-synthesized sections (Shape A, build side) Adds Section::content_hash, populates it on pattern-synthesized sections with FNV-1a-64 of the first 0x100 bytes of the decompressed body, and emits it into recomp_overlays.inl's SectionTableEntry. The runtime side hashes the same window over the bytes Stadium loads at fragment_ptr and looks up the matching section by hash. Build-time and runtime use: - SAME hash algorithm: FNV-1a-64 - SAME window: 0x100 bytes (95% uniqueness across Stadium's 282 distinct fragment bodies; falls back to first-candidate on the residual ~5%) - SAME byte source: pre-relocation decompressed bytes (link-time form, before Stadium's R_MIPS_32 patches run) Section table emit gains the .content_hash field; non-pattern sections get hash=0, runtime-side condition `sec.content_hash != 0` filters them out of the candidate set. Pairs with the runtime-side change in lib/N64ModernRuntime/librecomp/src/overlays.cpp. Activation in PokemonStadiumRecomp's game.toml is gated on a follow-up: pattern-synthesized impl bodies currently get a basic forward-CFG-walked size which produces invalid C for fragments with internal jump tables (data interpreted as code). Future fix: emit pattern-section impl bodies as runtime-dispatched stubs instead of trying to statically recompile each body. Until then, fragment78 stays declared as a single static [[input.decompressed_section]]; the engine's pattern infrastructure is in place, ready to be flipped on once the impl-body emit is reshaped. Co-Authored-By: Claude Opus 4.7 (1M context) --- include/recompiler/context.h | 6 ++++++ src/decompressed.cpp | 21 +++++++++++++++------ src/main.cpp | 5 +++-- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/recompiler/context.h b/include/recompiler/context.h index e7b4431..afc7515 100644 --- a/include/recompiler/context.h +++ b/include/recompiler/context.h @@ -107,6 +107,12 @@ namespace N64Recomp { bool fixed_address = false; // Only used in mods, indicates that the section shouldn't be relocated or placed into mod memory. bool globally_loaded = false; // Only used in mods, indicates that the section's functions should be globally loaded. Does not actually load the section's contents into ram. std::optional got_ram_addr = std::nullopt; + // Content hash for runtime identification when multiple sections + // share a link vram (e.g. pattern-synthesized decompressed + // sections). Nonzero only for those; emitted into + // recomp_overlays.inl's SectionTableEntry.content_hash. Computed + // as FNV-1a-64 of the first 0x40 bytes of the section's body. + uint64_t content_hash = 0; }; struct ReferenceSection { diff --git a/src/decompressed.cpp b/src/decompressed.cpp index 1b0d129..4488ac7 100644 --- a/src/decompressed.cpp +++ b/src/decompressed.cpp @@ -476,7 +476,8 @@ size_t add_decompressed_section(Context& context, uint32_t rom_wrapper, uint32_t vram, const std::string& section_name, - bool relocatable) + bool relocatable, + uint64_t content_hash) { if (blob.size() < 0x20) { std::fprintf(stderr, @@ -520,6 +521,7 @@ size_t add_decompressed_section(Context& context, section.name = section_name; section.executable = true; section.relocatable = relocatable; + section.content_hash = content_hash; if (!parse_fragment_relocs(blob, vram, section_index, section)) { return size_t(-1); @@ -768,24 +770,31 @@ bool synthesize_decompressed_patterns( if (hits.empty()) continue; - // Deduplicate by content hash. + // Deduplicate by content hash. Hash window is the first 0x100 + // bytes — measured at 95% uniqueness for Stadium's 0x8FF00000 + // slot. The runtime side uses the SAME window over the bytes + // Stadium decompressed into RDRAM, so build-time and runtime + // hashes match. (Smaller fragments hash their full body.) + constexpr size_t HASH_WINDOW = 0x100; std::unordered_map seen_hashes; size_t added = 0; size_t deduped = 0; for (auto& [wrap_off, body] : hits) { - uint64_t h = fnv1a_64(body.data(), body.size()); - auto it = seen_hashes.find(h); + const size_t window = std::min(HASH_WINDOW, body.size()); + const uint64_t content_hash = + fnv1a_64(body.data(), window); + auto it = seen_hashes.find(content_hash); if (it != seen_hashes.end()) { deduped++; continue; } - seen_hashes.emplace(h, wrap_off); + seen_hashes.emplace(content_hash, wrap_off); const std::string section_name = fmt::format( "{}__rom_{:X}", base_name, wrap_off); size_t si = add_decompressed_section( context, body, wrap_off, p.vram, - section_name, p.relocatable); + section_name, p.relocatable, content_hash); if (si == size_t(-1)) { std::fprintf(stderr, "decompressed: pattern %s — failed to add section " diff --git a/src/main.cpp b/src/main.cpp index fb37cd8..0ff68da 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1058,9 +1058,10 @@ int main(int argc, char** argv) { std::string section_relocs_array_size = section_relocs.empty() ? "0" : fmt::format("ARRLEN({})", section_relocs_array_name); // Write the section's table entry. - section_load_table += fmt::format(" {{ .rom_addr = 0x{0:08X}, .ram_addr = 0x{1:08X}, .size = 0x{2:08X}, .funcs = {3}, .num_funcs = ARRLEN({3}), .relocs = {4}, .num_relocs = {5}, .index = {6} }},\n", + section_load_table += fmt::format(" {{ .rom_addr = 0x{0:08X}, .ram_addr = 0x{1:08X}, .size = 0x{2:08X}, .funcs = {3}, .num_funcs = ARRLEN({3}), .relocs = {4}, .num_relocs = {5}, .index = {6}, .content_hash = 0x{7:016X}ull }},\n", section.rom_addr, section.ram_addr, section.size, section_funcs_array_name, - section_relocs_array_name, section_relocs_array_size, section_index); + section_relocs_array_name, section_relocs_array_size, section_index, + section.content_hash); // Write the section's functions. fmt::print(overlay_file, "static FuncEntry {}[] = {{\n", section_funcs_array_name);