mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2026-05-09 04:13:28 -05:00
Merge 2fe7e109aa into 30a20d75d2
This commit is contained in:
commit
5ba2c8f36f
|
|
@ -351,8 +351,10 @@ void Jit64::Shutdown()
|
|||
void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
|
||||
{
|
||||
FlushCarry();
|
||||
gpr.Flush(BitSet32(0xFFFFFFFF), RegCache::IgnoreDiscardedRegisters::Yes);
|
||||
fpr.Flush(BitSet32(0xFFFFFFFF), RegCache::IgnoreDiscardedRegisters::Yes);
|
||||
gpr.Flush(BitSet32(0xFFFFFFFF), RegCache::FlushMode::Full,
|
||||
RegCache::IgnoreDiscardedRegisters::Yes);
|
||||
fpr.Flush(BitSet32(0xFFFFFFFF), RegCache::FlushMode::Full,
|
||||
RegCache::IgnoreDiscardedRegisters::Yes);
|
||||
|
||||
if (js.op->canEndBlock)
|
||||
{
|
||||
|
|
@ -1151,7 +1153,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
// output, which needs to be bound in the actual instruction compilation.
|
||||
// TODO: make this smarter in the case that we're actually register-starved, i.e.
|
||||
// prioritize the more important registers.
|
||||
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
|
||||
gpr.PreloadRegisters(op.regsIn & (op.gprWillBeRead | op.gprWillBeWritten) &
|
||||
~op.gprDiscardable);
|
||||
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
|
||||
}
|
||||
|
||||
|
|
@ -1230,8 +1233,12 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
gpr.Discard(op.gprDiscardable);
|
||||
fpr.Discard(op.fprDiscardable);
|
||||
}
|
||||
gpr.Flush(~op.gprInUse & (op.regsIn | op.regsOut));
|
||||
fpr.Flush(~op.fprInUse & (op.fregsIn | op.GetFregsOut()));
|
||||
gpr.Flush(~(op.gprWillBeRead | op.gprWillBeWritten) & (op.regsIn | op.regsOut),
|
||||
RegCache::FlushMode::Full);
|
||||
fpr.Flush(~(op.fprWillBeRead | op.fprWillBeWritten) & (op.fregsIn | op.GetFregsOut()),
|
||||
RegCache::FlushMode::Full);
|
||||
gpr.Flush(~op.gprWillBeWritten & op.regsOut, RegCache::FlushMode::Undirty);
|
||||
fpr.Flush(~op.fprWillBeWritten & op.GetFregsOut(), RegCache::FlushMode::Undirty);
|
||||
|
||||
if (opinfo->flags & FL_LOADSTORE)
|
||||
++js.numLoadStoreInst;
|
||||
|
|
|
|||
|
|
@ -194,7 +194,10 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext)
|
|||
// We don't want to do this if a test is needed though, because it would interrupt macro-op
|
||||
// fusion.
|
||||
arg.Unlock();
|
||||
gpr.Flush(~js.op->gprInUse);
|
||||
gpr.Flush(~(js.op->gprWillBeRead | js.op->gprWillBeWritten) &
|
||||
(js.op->regsIn | js.op->regsOut),
|
||||
RegCache::FlushMode::Full);
|
||||
gpr.Flush(~js.op->gprWillBeWritten & js.op->regsOut, RegCache::FlushMode::Undirty);
|
||||
}
|
||||
DoMergedBranchCondition();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty)
|
|||
|
||||
BitSet32 GPRRegCache::GetRegUtilization() const
|
||||
{
|
||||
return m_jit.js.op->gprInUse;
|
||||
return m_jit.js.op->gprWillBeRead | m_jit.js.op->gprWillBeWritten;
|
||||
}
|
||||
|
||||
BitSet32 GPRRegCache::CountRegsIn(preg_t preg, u32 lookahead) const
|
||||
|
|
|
|||
|
|
@ -330,7 +330,8 @@ void RegCache::Discard(BitSet32 pregs)
|
|||
}
|
||||
}
|
||||
|
||||
void RegCache::Flush(BitSet32 pregs, IgnoreDiscardedRegisters ignore_discarded_registers)
|
||||
void RegCache::Flush(BitSet32 pregs, FlushMode mode,
|
||||
IgnoreDiscardedRegisters ignore_discarded_registers)
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, std::ranges::none_of(m_xregs, &X64CachedReg::IsLocked),
|
||||
"Someone forgot to unlock a X64 reg");
|
||||
|
|
@ -342,7 +343,7 @@ void RegCache::Flush(BitSet32 pregs, IgnoreDiscardedRegisters ignore_discarded_r
|
|||
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!",
|
||||
i);
|
||||
|
||||
StoreFromRegister(i, FlushMode::Full, ignore_discarded_registers);
|
||||
StoreFromRegister(i, mode, ignore_discarded_registers);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -615,7 +616,7 @@ void RegCache::Realize(preg_t preg)
|
|||
|
||||
if (m_constraints[preg].ShouldBeRevertable())
|
||||
{
|
||||
StoreFromRegister(preg, FlushMode::MaintainState);
|
||||
StoreFromRegister(preg, FlushMode::Undirty);
|
||||
do_bind();
|
||||
m_regs[preg].SetRevertable();
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -121,8 +121,11 @@ class RegCache
|
|||
public:
|
||||
enum class FlushMode
|
||||
{
|
||||
// All dirty registers get written back, and all registers get removed from the cache.
|
||||
Full,
|
||||
MaintainState,
|
||||
// All dirty registers get written back and get set as no longer dirty.
|
||||
// No registers are removed from the cache.
|
||||
Undirty,
|
||||
};
|
||||
|
||||
enum class IgnoreDiscardedRegisters
|
||||
|
|
@ -175,7 +178,7 @@ public:
|
|||
|
||||
RCForkGuard Fork();
|
||||
void Discard(BitSet32 pregs);
|
||||
void Flush(BitSet32 pregs = BitSet32::AllTrue(32),
|
||||
void Flush(BitSet32 pregs = BitSet32::AllTrue(32), FlushMode mode = FlushMode::Full,
|
||||
IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No);
|
||||
void Reset(BitSet32 pregs);
|
||||
void Revert();
|
||||
|
|
|
|||
|
|
@ -257,8 +257,8 @@ void JitArm64::Shutdown()
|
|||
void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
||||
{
|
||||
FlushCarry();
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
|
||||
|
||||
if (js.op->canEndBlock)
|
||||
{
|
||||
|
|
@ -322,8 +322,8 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
|||
void JitArm64::HLEFunction(u32 hook_index)
|
||||
{
|
||||
FlushCarry();
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
ABI_CallFunction(&HLE::ExecuteFromJIT, js.compilerPC, hook_index, &m_system);
|
||||
}
|
||||
|
|
@ -456,8 +456,8 @@ void JitArm64::MSRUpdated(u32 msr)
|
|||
// Call PageTableUpdatedFromJit if needed
|
||||
if (UReg_MSR(msr).DR)
|
||||
{
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
auto WA = gpr.GetScopedReg();
|
||||
|
||||
|
|
@ -497,8 +497,8 @@ void JitArm64::MSRUpdated(ARM64Reg msr)
|
|||
|
||||
// Call PageTableUpdatedFromJit if needed
|
||||
MOV(WA, msr);
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
FixupBranch dr_unset = TBZ(WA, dr_bit);
|
||||
static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000);
|
||||
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending));
|
||||
|
|
@ -1311,8 +1311,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address))
|
||||
{
|
||||
FlushCarry();
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
static_assert(PPCSTATE_OFF(pc) <= 252);
|
||||
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
|
||||
|
|
@ -1371,8 +1371,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
if (bJITRegisterCacheOff)
|
||||
{
|
||||
FlushCarry();
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
m_constant_propagation.Clear();
|
||||
|
||||
CompileInstruction(op);
|
||||
|
|
@ -1418,9 +1418,16 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
fpr.DiscardRegisters(op.fprDiscardable);
|
||||
gpr.DiscardCRRegisters(op.crDiscardable);
|
||||
}
|
||||
gpr.StoreRegisters(~op.gprInUse & (op.regsIn | op.regsOut));
|
||||
fpr.StoreRegisters(~op.fprInUse & (op.fregsIn | op.GetFregsOut()));
|
||||
gpr.StoreCRRegisters(~op.crInUse & (op.crIn | op.crOut));
|
||||
gpr.FlushRegisters(~(op.gprWillBeRead | op.gprWillBeWritten) & (op.regsIn | op.regsOut),
|
||||
FlushMode::Full);
|
||||
fpr.FlushRegisters(~(op.fprWillBeRead | op.fprWillBeWritten) &
|
||||
(op.fregsIn | op.GetFregsOut()),
|
||||
FlushMode::Full);
|
||||
gpr.FlushCRRegisters(~(op.crWillBeRead | op.crWillBeWritten) & (op.crIn | op.crOut),
|
||||
FlushMode::Full);
|
||||
gpr.FlushRegisters(~op.gprWillBeWritten & op.regsOut, FlushMode::Undirty);
|
||||
fpr.FlushRegisters(~op.fprWillBeWritten & op.GetFregsOut(), FlushMode::Undirty);
|
||||
gpr.FlushCRRegisters(~op.crWillBeWritten & op.crOut, FlushMode::Undirty);
|
||||
|
||||
if (opinfo->flags & FL_LOADSTORE)
|
||||
++js.numLoadStoreInst;
|
||||
|
|
@ -1435,8 +1442,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
|
||||
if (code_block.m_broken)
|
||||
{
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
WriteExit(nextPC);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -322,8 +322,8 @@ void JitArm64::FlushPPCStateBeforeSlowAccess(ARM64Reg temp_gpr, ARM64Reg temp_fp
|
|||
MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks();
|
||||
if (mem_checks.HasAny())
|
||||
{
|
||||
gpr.StoreRegisters(mem_checks.GetGPRsUsedInConditions(), temp_gpr, FlushMode::MaintainState);
|
||||
fpr.StoreRegisters(mem_checks.GetFPRsUsedInConditions(), temp_fpr, FlushMode::MaintainState);
|
||||
gpr.FlushRegisters(mem_checks.GetGPRsUsedInConditions(), FlushMode::MaintainState, temp_gpr);
|
||||
fpr.FlushRegisters(mem_checks.GetFPRsUsedInConditions(), FlushMode::MaintainState, temp_fpr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@ void JitArm64::sc(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITBranchOff);
|
||||
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
{
|
||||
auto WA = gpr.GetScopedReg();
|
||||
|
|
@ -39,8 +39,8 @@ void JitArm64::rfi(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITBranchOff);
|
||||
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
// See Interpreter rfi for details
|
||||
const u32 mask = 0x87C0FFFF;
|
||||
|
|
@ -140,8 +140,8 @@ void JitArm64::bx(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
if (js.op->branchIsIdleLoop)
|
||||
{
|
||||
|
|
@ -243,8 +243,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
|||
|
||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||
{
|
||||
gpr.Flush(FlushMode::All, WA);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, WA);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
if (IsBranchWatchEnabled())
|
||||
{
|
||||
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {}, {});
|
||||
|
|
@ -275,8 +275,8 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
|
|||
// BO_2 == 1z1zz -> b always
|
||||
|
||||
// NPC = CTR & 0xfffffffc;
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
Arm64GPRCache::ScopedARM64Reg WB = ARM64Reg::INVALID_REG;
|
||||
if (inst.LK_3)
|
||||
|
|
@ -345,8 +345,8 @@ void JitArm64::bclrx(UGeckoInstruction inst)
|
|||
STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
||||
}
|
||||
|
||||
gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, WB);
|
||||
fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::Full, WB);
|
||||
fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
if (IsBranchWatchEnabled())
|
||||
{
|
||||
|
|
@ -390,8 +390,8 @@ void JitArm64::bclrx(UGeckoInstruction inst)
|
|||
|
||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||
{
|
||||
gpr.Flush(FlushMode::All, WA);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, WA);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
if (IsBranchWatchEnabled())
|
||||
{
|
||||
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {}, {});
|
||||
|
|
|
|||
|
|
@ -559,26 +559,11 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
BitSet32 gprs_to_flush = ~js.op->gprInUse & BitSet32(0xFFFFFFFFU << d);
|
||||
if (!js.op->gprInUse[a])
|
||||
{
|
||||
if (!a_is_addr_base_reg)
|
||||
{
|
||||
gprs_to_flush[a] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
gprs_to_flush[a] = false;
|
||||
BitSet32 gprs_to_undirty = ~js.op->gprWillBeWritten & BitSet32(0xFFFFFFFFU << d);
|
||||
|
||||
if (a + 1 == d && (std::countr_one((~js.op->gprInUse).m_val >> a) & 1) == 0)
|
||||
{
|
||||
// In this situation, we can save one store instruction by flushing GPR d together with GPR
|
||||
// a, but we shouldn't flush GPR a until the end of the PPC instruction. Therefore, let's
|
||||
// also wait with flushing GPR d until the end of the PPC instruction.
|
||||
gprs_to_flush[d] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
BitSet32 gprs_to_flush = ~(js.op->gprWillBeWritten | js.op->gprWillBeRead);
|
||||
if (a_is_addr_base_reg)
|
||||
gprs_to_flush[a] = false;
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_32;
|
||||
|
|
@ -614,18 +599,20 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
{
|
||||
gpr.DiscardRegisters(BitSet32{int(i)});
|
||||
}
|
||||
else if (gprs_to_flush[i])
|
||||
else if (gprs_to_undirty[i])
|
||||
{
|
||||
BitSet32 gprs_to_flush_this_time{};
|
||||
if (i != 0 && gprs_to_flush[i - 1])
|
||||
gprs_to_flush_this_time = BitSet32{int(i - 1), int(i)};
|
||||
else if (i == 31 || !gprs_to_flush[i + 1])
|
||||
gprs_to_flush_this_time = BitSet32{int(i)};
|
||||
BitSet32 gprs_to_undirty_this_time{};
|
||||
if (i != 0 && gprs_to_undirty[i - 1])
|
||||
gprs_to_undirty_this_time = BitSet32{int(i - 1), int(i)};
|
||||
else if (i == 31 || !gprs_to_undirty[i + 1])
|
||||
gprs_to_undirty_this_time = BitSet32{int(i)};
|
||||
else
|
||||
continue;
|
||||
|
||||
gpr.StoreRegisters(gprs_to_flush_this_time);
|
||||
gprs_to_flush &= ~gprs_to_flush_this_time;
|
||||
gpr.FlushRegisters(gprs_to_undirty_this_time, FlushMode::Undirty, ARM64Reg::INVALID_REG);
|
||||
gpr.FlushRegisters(gprs_to_undirty_this_time & gprs_to_flush, FlushMode::Full,
|
||||
ARM64Reg::INVALID_REG);
|
||||
gprs_to_undirty &= ~gprs_to_undirty_this_time;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -677,27 +664,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
const BitSet32 dirty_gprs_to_flush_unmasked = ~js.op->gprInUse & gpr.GetDirtyGPRs();
|
||||
BitSet32 dirty_gprs_to_flush = dirty_gprs_to_flush_unmasked & BitSet32(0xFFFFFFFFU << s);
|
||||
if (dirty_gprs_to_flush_unmasked[a])
|
||||
{
|
||||
if (!a_is_addr_base_reg)
|
||||
{
|
||||
dirty_gprs_to_flush[a] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
dirty_gprs_to_flush[a] = false;
|
||||
|
||||
if (a + 1 == s && (std::countr_one((~js.op->gprInUse).m_val >> a) & 1) == 0)
|
||||
{
|
||||
// In this situation, we can save one store instruction by flushing GPR s together with GPR
|
||||
// a, but we shouldn't flush GPR a until the end of the PPC instruction. Therefore, let's
|
||||
// also wait with flushing GPR s until the end of the PPC instruction.
|
||||
dirty_gprs_to_flush[s] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
const BitSet32 gprs_to_flush = ~(js.op->gprWillBeRead | js.op->gprWillBeWritten);
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_32;
|
||||
|
|
@ -720,34 +687,12 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
|
||||
fprs_in_use);
|
||||
|
||||
// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
|
||||
// after this instruction, flush registers that would be flushed after this instruction anyway.
|
||||
//
|
||||
// We try to store two registers at a time when possible to let the register cache use STP.
|
||||
// To reduce register pressure, flush registers that would be flushed after this instruction
|
||||
// anyway.
|
||||
if (gprs_to_discard[i])
|
||||
{
|
||||
gpr.DiscardRegisters(BitSet32{int(i)});
|
||||
}
|
||||
else if (dirty_gprs_to_flush[i])
|
||||
{
|
||||
BitSet32 gprs_to_flush_this_time{};
|
||||
if (i != 0 && dirty_gprs_to_flush[i - 1])
|
||||
gprs_to_flush_this_time = BitSet32{int(i - 1), int(i)};
|
||||
else if (i == 31 || !dirty_gprs_to_flush[i + 1])
|
||||
gprs_to_flush_this_time = BitSet32{int(i)};
|
||||
else
|
||||
continue;
|
||||
|
||||
gpr.StoreRegisters(gprs_to_flush_this_time);
|
||||
dirty_gprs_to_flush &= ~gprs_to_flush_this_time;
|
||||
}
|
||||
else if (!js.op->gprInUse[i])
|
||||
{
|
||||
// If this register can be flushed but it isn't dirty, no store instruction will be emitted
|
||||
// when flushing it, so it doesn't matter if we flush it together with another register or
|
||||
// not. Let's just flush it in the simplest way possible.
|
||||
gpr.StoreRegisters(BitSet32{int(i)});
|
||||
}
|
||||
else if (gprs_to_flush[i])
|
||||
gpr.FlushRegisters(BitSet32{int(i)}, FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ void Arm64RegCache::FlushMostStaleRegister()
|
|||
}
|
||||
}
|
||||
|
||||
FlushRegister(most_stale_preg, FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
FlushRegister(most_stale_preg, FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
}
|
||||
|
||||
void Arm64RegCache::DiscardRegister(size_t preg)
|
||||
|
|
@ -203,11 +203,15 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg
|
|||
if (!reg.IsInPPCState())
|
||||
m_emit->STR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
|
||||
|
||||
if (mode == FlushMode::All)
|
||||
if (mode == FlushMode::Full)
|
||||
{
|
||||
UnlockRegister(EncodeRegTo32(host_reg));
|
||||
reg.Flush();
|
||||
}
|
||||
else if (mode == FlushMode::Undirty)
|
||||
{
|
||||
reg.SetDirty(false);
|
||||
}
|
||||
}
|
||||
else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET))
|
||||
{
|
||||
|
|
@ -244,8 +248,10 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg
|
|||
}
|
||||
}
|
||||
|
||||
if (mode == FlushMode::All)
|
||||
if (mode == FlushMode::Full)
|
||||
reg.Flush();
|
||||
else if (mode == FlushMode::Undirty)
|
||||
reg.SetDirty(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -270,10 +276,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
|
|||
const bool reg2_imm = IsImm(i + 1);
|
||||
const bool reg1_zero = reg1_imm && GetImm(i) == 0;
|
||||
const bool reg2_zero = reg2_imm && GetImm(i + 1) == 0;
|
||||
const bool flush_all = mode == FlushMode::All;
|
||||
const bool can_allocate_reg = mode != FlushMode::MaintainState;
|
||||
if (!reg1.IsInPPCState() && !reg2.IsInPPCState() &&
|
||||
(reg1.IsInHostRegister() || (reg1_imm && (reg1_zero || flush_all))) &&
|
||||
(reg2.IsInHostRegister() || (reg2_imm && (reg2_zero || flush_all))))
|
||||
(reg1.IsInHostRegister() || (reg1_imm && (reg1_zero || can_allocate_reg))) &&
|
||||
(reg2.IsInHostRegister() || (reg2_imm && (reg2_zero || can_allocate_reg))))
|
||||
{
|
||||
const size_t ppc_offset = GetGuestByIndex(i).ppc_offset;
|
||||
if (ppc_offset <= 252)
|
||||
|
|
@ -281,7 +287,7 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
|
|||
ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : BindForRead(i);
|
||||
ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : BindForRead(i + 1);
|
||||
m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset));
|
||||
if (flush_all)
|
||||
if (mode == FlushMode::Full)
|
||||
{
|
||||
if (reg1.IsInHostRegister())
|
||||
UnlockRegister(reg1.GetReg());
|
||||
|
|
@ -290,6 +296,11 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
|
|||
reg1.Flush();
|
||||
reg2.Flush();
|
||||
}
|
||||
else if (mode == FlushMode::Undirty)
|
||||
{
|
||||
reg1.SetDirty(false);
|
||||
reg2.SetDirty(false);
|
||||
}
|
||||
++iter;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -497,7 +508,7 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg)
|
|||
const OpArg& reg = m_guest_registers[i];
|
||||
if (reg.IsInHostRegister() && DecodeReg(reg.GetReg()) == DecodeReg(host_reg))
|
||||
{
|
||||
FlushRegister(i, FlushMode::All, tmp_reg);
|
||||
FlushRegister(i, FlushMode::Full, tmp_reg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -788,7 +799,7 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg)
|
|||
|
||||
if (reg.IsInHostRegister() && reg.GetReg() == host_reg)
|
||||
{
|
||||
FlushRegister(i, FlushMode::All, tmp_reg);
|
||||
FlushRegister(i, FlushMode::Full, tmp_reg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -817,6 +828,22 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg)
|
|||
const bool dirty = !reg.IsInPPCState();
|
||||
RegType type = reg.GetFPRType();
|
||||
|
||||
if (mode == FlushMode::Undirty)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case RegType::Single:
|
||||
case RegType::DuplicatedSingle:
|
||||
case RegType::LowerPairSingle:
|
||||
// In this situation, skip flushing. It's usually better to wait until later instead to avoid
|
||||
// extra conversions. We can revisit this decision in the future if the register cache gets
|
||||
// the ability to store both the single and double versions of a value simultaneously.
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool allocated_tmp_reg = false;
|
||||
if (tmp_reg != ARM64Reg::INVALID_REG)
|
||||
{
|
||||
|
|
@ -868,11 +895,15 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg)
|
|||
static_cast<s32>(PPCSTATE_OFF_PS0(preg)));
|
||||
}
|
||||
|
||||
if (mode == FlushMode::All)
|
||||
if (mode == FlushMode::Full)
|
||||
{
|
||||
UnlockRegister(host_reg);
|
||||
reg.Flush();
|
||||
}
|
||||
else if (mode == FlushMode::Undirty)
|
||||
{
|
||||
reg.SetDirty(false);
|
||||
}
|
||||
}
|
||||
else if (type == RegType::Duplicated)
|
||||
{
|
||||
|
|
@ -892,11 +923,15 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg)
|
|||
}
|
||||
}
|
||||
|
||||
if (mode == FlushMode::All)
|
||||
if (mode == FlushMode::Full)
|
||||
{
|
||||
UnlockRegister(host_reg);
|
||||
reg.Flush();
|
||||
}
|
||||
else if (mode == FlushMode::Undirty)
|
||||
{
|
||||
reg.SetDirty(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (allocated_tmp_reg)
|
||||
|
|
|
|||
|
|
@ -68,13 +68,17 @@ enum class RegType
|
|||
DuplicatedSingle, // PS0 and PS1 are identical, host register only stores one lane (32-bit)
|
||||
};
|
||||
|
||||
enum class FlushMode : bool
|
||||
enum class FlushMode
|
||||
{
|
||||
// Flushes all registers, no exceptions
|
||||
All,
|
||||
// Flushes registers in a conditional branch
|
||||
// Doesn't wipe the state of the registers from the cache
|
||||
// All dirty registers get written back, and all registers get removed from the cache.
|
||||
Full,
|
||||
// All dirty registers get written back, but the state of the cache is untouched.
|
||||
// The host registers may get clobbered. This is intended for use when doing a block exit
|
||||
// after a conditional branch.
|
||||
MaintainState,
|
||||
// Most dirty registers get written back and get set as no longer dirty.
|
||||
// No registers are removed from the cache.
|
||||
Undirty,
|
||||
};
|
||||
|
||||
enum class IgnoreDiscardedRegisters
|
||||
|
|
@ -379,17 +383,15 @@ public:
|
|||
|
||||
BitSet32 GetDirtyGPRs() const;
|
||||
|
||||
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
FlushMode flush_mode = FlushMode::All)
|
||||
{
|
||||
FlushRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No);
|
||||
}
|
||||
void FlushRegisters(
|
||||
BitSet32 regs, FlushMode flush_mode = FlushMode::Full,
|
||||
Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No);
|
||||
|
||||
void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
FlushMode flush_mode = FlushMode::All)
|
||||
{
|
||||
FlushCRRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No);
|
||||
}
|
||||
void FlushCRRegisters(
|
||||
BitSet8 regs, FlushMode flush_mode = FlushMode::Full,
|
||||
Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No);
|
||||
|
||||
void DiscardCRRegisters(BitSet8 regs);
|
||||
void ResetCRRegisters(BitSet8 regs);
|
||||
|
|
@ -436,11 +438,6 @@ private:
|
|||
void SetImmediateInternal(size_t index, u32 imm, bool dirty);
|
||||
void BindForWrite(size_t index, bool will_read, bool will_write = true);
|
||||
|
||||
void FlushRegisters(BitSet32 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg,
|
||||
IgnoreDiscardedRegisters ignore_discarded_registers);
|
||||
void FlushCRRegisters(BitSet8 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg,
|
||||
IgnoreDiscardedRegisters ignore_discarded_registers);
|
||||
|
||||
static constexpr size_t GUEST_GPR_COUNT = 32;
|
||||
static constexpr size_t GUEST_CR_COUNT = 8;
|
||||
static constexpr size_t GUEST_GPR_OFFSET = 0;
|
||||
|
|
@ -470,11 +467,8 @@ public:
|
|||
|
||||
void FixSinglePrecision(size_t preg);
|
||||
|
||||
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
FlushMode flush_mode = FlushMode::All)
|
||||
{
|
||||
FlushRegisters(regs, flush_mode, tmp_reg);
|
||||
}
|
||||
void FlushRegisters(BitSet32 regs, FlushMode flush_mode = FlushMode::Full,
|
||||
Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||
|
||||
protected:
|
||||
// Get the order of the host registers
|
||||
|
|
@ -489,6 +483,4 @@ protected:
|
|||
private:
|
||||
bool IsCallerSaved(Arm64Gen::ARM64Reg reg) const;
|
||||
bool IsTopHalfUsed(Arm64Gen::ARM64Reg reg) const;
|
||||
|
||||
void FlushRegisters(BitSet32 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -227,8 +227,8 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
|
|||
if (!imm_value)
|
||||
MSRUpdated(gpr.R(inst.RS));
|
||||
|
||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
|
||||
WriteExceptionExit(js.compilerPC + 4, true);
|
||||
}
|
||||
|
|
@ -367,8 +367,8 @@ void JitArm64::twx(UGeckoInstruction inst)
|
|||
|
||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||
{
|
||||
gpr.Flush(FlushMode::All, WA);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
gpr.Flush(FlushMode::Full, WA);
|
||||
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
|
||||
WriteExit(js.compilerPC + 4);
|
||||
}
|
||||
}
|
||||
|
|
@ -714,15 +714,12 @@ void JitArm64::mfcr(UGeckoInstruction inst)
|
|||
CMP(CR, ARM64Reg::ZR);
|
||||
CSEL(WA, WC, WA, CC_GT);
|
||||
|
||||
// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
|
||||
// after this instruction, flush registers that would be flushed after this instruction anyway.
|
||||
//
|
||||
// There's no point in ensuring we flush two registers at the same time, because the offset in
|
||||
// ppcState for CRs is too large to be encoded into an STP instruction.
|
||||
// To reduce register pressure, flush registers that would be flushed after this instruction
|
||||
// anyway.
|
||||
if (js.op->crDiscardable[i])
|
||||
gpr.DiscardCRRegisters(BitSet8{i});
|
||||
else if (!js.op->crInUse[i])
|
||||
gpr.StoreCRRegisters(BitSet8{i}, WC);
|
||||
else if (!(js.op->crWillBeRead | js.op->crWillBeWritten)[i])
|
||||
gpr.FlushCRRegisters(BitSet8{i}, FlushMode::Full, WC);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -984,8 +984,9 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
|
|||
// wants flags, to be safe.
|
||||
bool wantsFPRF = true;
|
||||
bool wantsCA = true;
|
||||
BitSet8 crInUse, crDiscardable;
|
||||
BitSet32 gprBlockInputs, gprInUse, fprInUse, gprDiscardable, fprDiscardable, fprInXmm;
|
||||
BitSet8 crWillBeRead, crWillBeWritten, crDiscardable;
|
||||
BitSet32 gprWillBeRead, gprWillBeWritten, fprWillBeRead, fprWillBeWritten, gprDiscardable,
|
||||
fprDiscardable, fprInXmm;
|
||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||
{
|
||||
CodeOp& op = code[i];
|
||||
|
|
@ -1012,28 +1013,38 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
|
|||
wantsCA |= opWantsCA || may_exit_block;
|
||||
wantsFPRF &= !op.outputFPRF || opWantsFPRF;
|
||||
wantsCA &= !op.outputCA || opWantsCA;
|
||||
op.gprInUse = gprInUse;
|
||||
op.fprInUse = fprInUse;
|
||||
op.crInUse = crInUse;
|
||||
op.gprWillBeRead = gprWillBeRead;
|
||||
op.gprWillBeWritten = gprWillBeWritten;
|
||||
op.fprWillBeRead = fprWillBeRead;
|
||||
op.fprWillBeWritten = fprWillBeWritten;
|
||||
op.crWillBeRead = crWillBeRead;
|
||||
op.crWillBeWritten = crWillBeWritten;
|
||||
op.gprDiscardable = gprDiscardable;
|
||||
op.fprDiscardable = fprDiscardable;
|
||||
op.crDiscardable = crDiscardable;
|
||||
op.fprInXmm = fprInXmm;
|
||||
gprBlockInputs &= ~op.regsOut;
|
||||
gprBlockInputs |= op.regsIn;
|
||||
gprInUse |= op.regsIn | op.regsOut;
|
||||
fprInUse |= op.fregsIn | op.GetFregsOut();
|
||||
crInUse |= op.crIn | op.crOut;
|
||||
gprWillBeRead &= ~op.regsOut;
|
||||
gprWillBeRead |= op.regsIn;
|
||||
gprWillBeWritten |= op.regsOut;
|
||||
fprWillBeRead &= ~op.GetFregsOut();
|
||||
fprWillBeRead |= op.fregsIn;
|
||||
fprWillBeWritten |= op.GetFregsOut();
|
||||
crWillBeRead &= ~op.crOut;
|
||||
crWillBeRead |= op.crIn;
|
||||
crWillBeWritten |= op.crOut;
|
||||
|
||||
if (strncmp(op.opinfo->opname, "stfd", 4))
|
||||
fprInXmm |= op.fregsIn;
|
||||
|
||||
if (hle || breakpoint)
|
||||
{
|
||||
gprInUse = BitSet32{};
|
||||
fprInUse = BitSet32{};
|
||||
gprWillBeRead = BitSet32{};
|
||||
gprWillBeWritten = BitSet32{};
|
||||
fprWillBeRead = BitSet32{};
|
||||
fprWillBeWritten = BitSet32{};
|
||||
fprInXmm = BitSet32{};
|
||||
crInUse = BitSet8{};
|
||||
crWillBeRead = BitSet8{};
|
||||
crWillBeWritten = BitSet8{};
|
||||
gprDiscardable = BitSet32{};
|
||||
fprDiscardable = BitSet32{};
|
||||
crDiscardable = BitSet8{};
|
||||
|
|
@ -1149,7 +1160,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
|
|||
}
|
||||
block->m_gqr_used = gqrUsed;
|
||||
block->m_gqr_modified = gqrModified;
|
||||
block->m_gpr_inputs = gprBlockInputs;
|
||||
block->m_gpr_inputs = gprWillBeRead;
|
||||
return address;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,11 +51,14 @@ struct CodeOp // 16B
|
|||
bool canCauseException = false;
|
||||
bool skipLRStack = false;
|
||||
bool skip = false; // followed BL-s for example
|
||||
BitSet8 crInUse;
|
||||
BitSet8 crWillBeRead;
|
||||
BitSet8 crWillBeWritten;
|
||||
BitSet8 crDiscardable;
|
||||
// which registers are still needed after this instruction in this block
|
||||
BitSet32 fprInUse;
|
||||
BitSet32 gprInUse;
|
||||
BitSet32 gprWillBeRead;
|
||||
BitSet32 gprWillBeWritten;
|
||||
BitSet32 fprWillBeRead;
|
||||
BitSet32 fprWillBeWritten;
|
||||
// which registers have values which are known to be unused after this instruction
|
||||
BitSet32 gprDiscardable;
|
||||
BitSet32 fprDiscardable;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user