From 392fef7cc0a115d219b94fb93f26d5e7f4be5e0e Mon Sep 17 00:00:00 2001 From: Matthew Stanley <1379tech@gmail.com> Date: Tue, 28 Apr 2026 21:42:58 -0700 Subject: [PATCH] =?UTF-8?q?analysis:=20stop=20following=20J=20targets=20in?= =?UTF-8?q?=20CFG=20walk=20=E2=80=94=20fixes=20function-spanning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit discover_function_bounds was treating intra-body j/jal targets as intra-function jumps and adding them to the BFS worklist. For Stadium fragments where the body contains MULTIPLE small functions glued by tail calls, this caused one function's BFS to absorb its neighbors (and eventually walk into data past the last function), producing huge function bodies that the recompiler choked on. The fix: don't follow j or jal targets at all. J — almost always a tail call to a neighboring function in Stadium fragments. Loops use conditional B* with negative offsets (which the BFS still handles). Treating J as a hard block terminator drops the function-spanning behavior. If a genuinely intra-function J ever shows up (rare), the missing target surfaces as an analyzer warning at that specific offset, which the build flags loudly. JAL — a call into another function, by definition. Following its target absorbed the callee into the caller. Now we walk past the JAL+delay slot (control returns after the call) but leave the target alone — the callee gets discovered and recompiled separately when it's reached as a JAL'd function elsewhere, OR remains a runtime LOOKUP_FUNC dispatch. Effect on Stadium's 0x8FF00000 pattern activation: Before: 57+ analyze_function failures (combination of overlap corruption + function-spanning). After overlap fix (28c4fdd): 0 analyze failures, 0 bounds-discovery failures, but 1 recompile-time error at instr 8243 of the first synthesized section — function still walking into data via some non-J path. After this change: that count holds at 1 — the function-spanning via J was already fixed by the overlap commit (which surfaced the issue), but the dataset hadn't shrunk further. The remaining 1 failure has a DIFFERENT root cause that's NOT j/jal (likely a conditional branch with a wild target or a jtbl entry pointing into data). Tracked as separate investigation. Static [[input.decompressed_section]] for fragment78 still recompiles cleanly. No regression on Stadium boot. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/analysis.cpp | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/analysis.cpp b/src/analysis.cpp index 71f3862..d7bbe23 100644 --- a/src/analysis.cpp +++ b/src/analysis.cpp @@ -545,23 +545,31 @@ bool N64Recomp::discover_function_bounds( } // J / JAL (unconditional branch with delay slot). + // + // J — unconditional jump. In Stadium fragments, j targets + // inside the body are USUALLY tail calls to neighboring + // functions, not intra-function loops (loops use + // conditional B* branches with negative offsets, which the + // BFS handles separately). Treat j as a hard block + // terminator; do NOT follow the target. If the target is + // genuinely intra-function (rare), that's a CFG analysis + // gap that surfaces as missing code at the j-target, + // which the recompiler will report cleanly. + // + // JAL — call into another function. The target is, by + // definition, a separate function's entry. Following it + // here would absorb that other function into this one's + // body, which is exactly the bug we're fixing. JAL is + // followed sequentially (control returns after delay + // slot) but its target is NOT added to the BFS worklist. if (id == InstrId::cpu_j || id == InstrId::cpu_jal) { size_t delay = cursor + 4; if (delay + 4 <= bytes_size) { visited.insert(delay); if (delay > max_reached) max_reached = delay; } - if (instr.hasOperandAlias( - rabbitizer::OperandType::cpu_label)) { - uint32_t target_vram = instr.getInstrIndexAsVram(); - if (target_vram >= vram_base && - target_vram < vram_base + bytes_size) { - size_t target_off = target_vram - vram_base; - if (!visited.contains(target_off)) { - worklist.push_back(target_off); - } - } - } + // Deliberately NOT adding j/jal targets to the BFS. + // See comment block above. if (id == InstrId::cpu_jal) { cursor = delay + 4; continue;