#include "function_patcher.h" #include "FunctionAddressProvider.h" #include "PatchedFunctionData.h" #include "utils/CThread.h" #include "utils/logger.h" #include "utils/utils.h" #include #include #include #include #include #include #include #include #include #include enum class PatchState { PREPARE, MAIN_CORE_PATCHING_DONE, ALL_CORES_PATCHING_DONE }; static volatile PatchState gPatchState = PatchState::PREPARE; static volatile int32_t gCoresReady = 0; static volatile int32_t gCoresFlushed = 0; static std::recursive_mutex sPatch_RestoreMutex; static OSSpinLock sGlobalSpinLock; static bool sSpinLockInitialized = false; struct WorkerTask { uint32_t targetPhys; uint32_t sourcePhys; uint32_t effectiveAddr; void *jumpData; uint32_t jumpDataSize; void *jumpToOriginal; void *realCallFunctionAddressPtr; }; static void applyKernelPatchOnCore(CThread *thread, void *arg) { (void) thread; auto *tasks = (std::vector *) arg; OSSpinLock localLock; OSInitSpinLock(&localLock); OSUninterruptibleSpinLock_Acquire(&localLock); OSAddAtomic(&gCoresReady, 1); // Wait for the main core to finish preparing physical addresses while (gPatchState == PatchState::PREPARE) { asm volatile("nop"); } for (const auto &task : *tasks) { KernelCopyData(task.targetPhys, task.sourcePhys, 4); } for (const auto &task : *tasks) { if (task.jumpData) { ICInvalidateRange(task.jumpData, task.jumpDataSize * sizeof(uint32_t)); } if (task.jumpToOriginal) { ICInvalidateRange(task.jumpToOriginal, 5 * sizeof(uint32_t)); } if (task.realCallFunctionAddressPtr) { ICInvalidateRange(task.realCallFunctionAddressPtr, sizeof(uint32_t)); } if (task.effectiveAddr) { ICInvalidateRange((void *) task.effectiveAddr, 4); } } // Force pipeline flush asm volatile("sync; isync"); // Atomically signal main core that our caches are clean OSAddAtomic(&gCoresFlushed, 1); // Wait for the release signal while (gPatchState == PatchState::ALL_CORES_PATCHING_DONE) {} // RESTORE INTERRUPTS OSUninterruptibleSpinLock_Release(&localLock); } struct PatchDispatchCtx { std::shared_ptr func; bool result; }; struct BatchPatchDispatchCtx { std::vector> *list; bool result; }; static void PatchFunctionsBatchDispatcher(CThread *thread, void *arg) { (void) thread; auto *ctx = (BatchPatchDispatchCtx *) arg; ctx->result = PatchFunctions(*(ctx->list)); } static void RestoreFunctionDispatcher(CThread *thread, void *arg) { (void) thread; auto *ctx = (PatchDispatchCtx *) arg; ctx->result = RestoreFunction(ctx->func); } bool PatchFunctions(std::vector> &patchedFunctions) { if (OSGetCoreId() != OSGetMainCoreId()) { DEBUG_FUNCTION_LINE_INFO("PatchFunctions called from Core %d. Dispatching to Main Core %d...", OSGetCoreId(), OSGetMainCoreId()); BatchPatchDispatchCtx ctx = {&patchedFunctions, false}; { CThread thread(CThread::eAttributeAffCore1, OSGetCurrentThread()->priority, 0x1000, PatchFunctionsBatchDispatcher, &ctx); thread.resumeThread(); } return ctx.result; } std::lock_guard lock(sPatch_RestoreMutex); gPatchState = PatchState::PREPARE; gCoresReady = 0; gCoresFlushed = 0; std::vector> validToPatch; validToPatch.reserve(patchedFunctions.size()); for (auto &patch : patchedFunctions) { if (patch->isPatched) { continue; } if (!patch->shouldBePatched()) { continue; } if (!patch->updateFunctionAddresses()) { continue; } if (!ReadFromPhysicalAddress(patch->realPhysicalFunctionAddress, &patch->replacedInstruction)) { DEBUG_FUNCTION_LINE_ERR("Failed to read instruction for %s", patch->functionName.value_or("").c_str()); continue; } patch->generateJumpToOriginal(); patch->generateReplacementJump(); validToPatch.push_back(patch); } if (validToPatch.empty()) { return true; } // This is very important. Otherwise the heap meta data might not up to date on all cores DCFlushRange(gJumpHeapData, JUMP_HEAP_DATA_SIZE); ICInvalidateRange(gJumpHeapData, JUMP_HEAP_DATA_SIZE); std::vector tasks; tasks.reserve(validToPatch.size()); for (auto &pf : validToPatch) { WorkerTask task = {}; task.targetPhys = pf->realPhysicalFunctionAddress; task.effectiveAddr = pf->realEffectiveFunctionAddress; task.jumpData = pf->jumpData; task.jumpDataSize = pf->jumpDataSize; task.jumpToOriginal = pf->jumpToOriginal; task.realCallFunctionAddressPtr = pf->realCallFunctionAddressPtr; uint32_t replace_ptr = (uint32_t) &pf->replaceWithInstruction; if (replace_ptr < 0x00800000 || replace_ptr >= 0x01000000) { task.sourcePhys = OSEffectiveToPhysical(replace_ptr); } else { task.sourcePhys = replace_ptr + 0x30800000 - 0x00800000; } tasks.push_back(task); } if (!sSpinLockInitialized) { OSInitSpinLock(&sGlobalSpinLock); sSpinLockInitialized = true; } CThread *threadA = CThread::create(applyKernelPatchOnCore, &tasks, CThread::eAttributeAffCore2, 0); CThread *threadB = CThread::create(applyKernelPatchOnCore, &tasks, CThread::eAttributeAffCore0, 0); threadA->resumeThread(); threadB->resumeThread(); while (gCoresReady < 2) { OSSleepTicks(1); } DEBUG_FUNCTION_LINE_VERBOSE("Applying %d patches on Core %d", validToPatch.size(), OSGetCoreId()); OSUninterruptibleSpinLock_Acquire(&sGlobalSpinLock); // D-Cache flush and Kernel Copy for all tasks for (size_t i = 0; i < validToPatch.size(); ++i) { auto &pf = validToPatch[i]; auto &task = tasks[i]; uint32_t replace_ptr = (uint32_t) &pf->replaceWithInstruction; DCFlushRange((void *) replace_ptr, 4); if (pf->jumpData) DCFlushRange(pf->jumpData, pf->jumpDataSize * sizeof(uint32_t)); if (pf->jumpToOriginal) DCFlushRange(pf->jumpToOriginal, 5 * sizeof(uint32_t)); if (pf->realCallFunctionAddressPtr) DCFlushRange(pf->realCallFunctionAddressPtr, sizeof(uint32_t)); KernelCopyData(task.targetPhys, task.sourcePhys, 4); } DCFlushRange(tasks.data(), sizeof(WorkerTask) * tasks.size()); DCFlushRange(&tasks, sizeof(tasks)); OSMemoryBarrier(); // Release remote cores gPatchState = PatchState::MAIN_CORE_PATCHING_DONE; // Invalidate Main Core I-Caches for (auto &task : tasks) { if (task.jumpData) { ICInvalidateRange(task.jumpData, task.jumpDataSize * sizeof(uint32_t)); } if (task.jumpToOriginal) { ICInvalidateRange(task.jumpToOriginal, 5 * sizeof(uint32_t)); } if (task.realCallFunctionAddressPtr) { ICInvalidateRange(task.realCallFunctionAddressPtr, sizeof(uint32_t)); } ICInvalidateRange((void *) task.effectiveAddr, 4); } asm volatile("sync; isync"); while (gCoresFlushed < 2) { asm volatile("nop"); } OSUninterruptibleSpinLock_Release(&sGlobalSpinLock); gPatchState = PatchState::ALL_CORES_PATCHING_DONE; delete threadA; delete threadB; for (auto &pf : validToPatch) { pf->isPatched = true; } return true; } // Single Patch function simply wraps the Batch function bool PatchFunction(std::shared_ptr &patchedFunction) { std::vector list = {patchedFunction}; return PatchFunctions(list); } bool RestoreFunction(std::shared_ptr &patchedFunction) { if (OSGetCoreId() != OSGetMainCoreId()) { DEBUG_FUNCTION_LINE_INFO("RestoreFunction called from Core %d. Dispatching to Main Core %d...", OSGetCoreId(), OSGetMainCoreId()); PatchDispatchCtx ctx = {patchedFunction, false}; CThread *dispatchThread = CThread::create(RestoreFunctionDispatcher, &ctx, CThread::eAttributeAffCore1, 0); delete dispatchThread; return ctx.result; } if (!patchedFunction->isPatched) { DEBUG_FUNCTION_LINE_VERBOSE("Skip restoring function because it's not patched"); return true; } if (patchedFunction->replacedInstruction == 0 || patchedFunction->realEffectiveFunctionAddress == 0) { DEBUG_FUNCTION_LINE_ERR("Failed to restore function, information is missing."); return false; } std::lock_guard lock(sPatch_RestoreMutex); auto targetAddrPhys = (uint32_t) patchedFunction->realPhysicalFunctionAddress; if (patchedFunction->library != LIBRARY_OTHER) { targetAddrPhys = (uint32_t) OSEffectiveToPhysical(patchedFunction->realEffectiveFunctionAddress); } // Check if patched instruction is still loaded. uint32_t currentInstruction; if (!ReadFromPhysicalAddress(patchedFunction->realPhysicalFunctionAddress, ¤tInstruction)) { DEBUG_FUNCTION_LINE_ERR("Failed to read instruction."); return false; } if (currentInstruction != patchedFunction->replaceWithInstruction) { DEBUG_FUNCTION_LINE_WARN("Instruction is different than expected. Skip restoring. Expected: %08X Real: %08X", currentInstruction, patchedFunction->replaceWithInstruction); return false; } DEBUG_FUNCTION_LINE_VERBOSE("Restoring %08X to %08X [%08X]", (uint32_t) patchedFunction->replacedInstruction, patchedFunction->realEffectiveFunctionAddress, targetAddrPhys); auto sourceAddr = (uint32_t) &patchedFunction->replacedInstruction; auto sourceAddrPhys = (uint32_t) OSEffectiveToPhysical(sourceAddr); // These hardcoded values should be replaced with something more dynamic. if (sourceAddrPhys == 0 && (sourceAddr >= 0x00800000 && sourceAddr < 0x01000000)) { sourceAddrPhys = sourceAddr + (0x30800000 - 0x00800000); } if (sourceAddrPhys == 0) { OSFatal("FunctionPatcherModule: Failed to get physical address"); } // Map Restore into the exact same Task structure for the generic worker std::vector tasks; WorkerTask task = {}; task.targetPhys = targetAddrPhys; task.sourcePhys = sourceAddrPhys; task.effectiveAddr = patchedFunction->realEffectiveFunctionAddress; tasks.push_back(task); DCFlushRange(tasks.data(), sizeof(task) * tasks.size()); DCFlushRange(&tasks, sizeof(tasks)); OSMemoryBarrier(); if (!sSpinLockInitialized) { OSInitSpinLock(&sGlobalSpinLock); sSpinLockInitialized = true; } gPatchState = PatchState::PREPARE; gCoresReady = 0; gCoresFlushed = 0; CThread *threadA = CThread::create(applyKernelPatchOnCore, &tasks, CThread::eAttributeAffCore2, 0, 0x1000); CThread *threadB = CThread::create(applyKernelPatchOnCore, &tasks, CThread::eAttributeAffCore0, 0, 0x1000); threadA->resumeThread(); threadB->resumeThread(); while (gCoresReady < 2) { OSSleepTicks(1); } DEBUG_FUNCTION_LINE_VERBOSE("Restore on thread for %08X on Core %d", patchedFunction->realPhysicalFunctionAddress, OSGetCoreId()); OSUninterruptibleSpinLock_Acquire(&sGlobalSpinLock); DCFlushRange(tasks.data(), tasks.size() * sizeof(WorkerTask)); KernelCopyData(task.targetPhys, task.sourcePhys, 4); OSMemoryBarrier(); DCFlushRange((void *) task.effectiveAddr, 4); gPatchState = PatchState::MAIN_CORE_PATCHING_DONE; ICInvalidateRange((void *) task.effectiveAddr, 4); asm volatile("sync; isync"); while (gCoresFlushed < 2) { asm volatile("nop");} OSUninterruptibleSpinLock_Release(&sGlobalSpinLock); gPatchState = PatchState::ALL_CORES_PATCHING_DONE; delete threadA; delete threadB; patchedFunction->isPatched = false; return true; }