This commit is contained in:
JosJuice 2026-05-07 14:03:34 -07:00 committed by GitHub
commit 5ba2c8f36f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 189 additions and 185 deletions

View File

@ -351,8 +351,10 @@ void Jit64::Shutdown()
void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
{
FlushCarry();
gpr.Flush(BitSet32(0xFFFFFFFF), RegCache::IgnoreDiscardedRegisters::Yes);
fpr.Flush(BitSet32(0xFFFFFFFF), RegCache::IgnoreDiscardedRegisters::Yes);
gpr.Flush(BitSet32(0xFFFFFFFF), RegCache::FlushMode::Full,
RegCache::IgnoreDiscardedRegisters::Yes);
fpr.Flush(BitSet32(0xFFFFFFFF), RegCache::FlushMode::Full,
RegCache::IgnoreDiscardedRegisters::Yes);
if (js.op->canEndBlock)
{
@ -1151,7 +1153,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
// output, which needs to be bound in the actual instruction compilation.
// TODO: make this smarter in the case that we're actually register-starved, i.e.
// prioritize the more important registers.
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
gpr.PreloadRegisters(op.regsIn & (op.gprWillBeRead | op.gprWillBeWritten) &
~op.gprDiscardable);
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
}
@ -1230,8 +1233,12 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
gpr.Discard(op.gprDiscardable);
fpr.Discard(op.fprDiscardable);
}
gpr.Flush(~op.gprInUse & (op.regsIn | op.regsOut));
fpr.Flush(~op.fprInUse & (op.fregsIn | op.GetFregsOut()));
gpr.Flush(~(op.gprWillBeRead | op.gprWillBeWritten) & (op.regsIn | op.regsOut),
RegCache::FlushMode::Full);
fpr.Flush(~(op.fprWillBeRead | op.fprWillBeWritten) & (op.fregsIn | op.GetFregsOut()),
RegCache::FlushMode::Full);
gpr.Flush(~op.gprWillBeWritten & op.regsOut, RegCache::FlushMode::Undirty);
fpr.Flush(~op.fprWillBeWritten & op.GetFregsOut(), RegCache::FlushMode::Undirty);
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;

View File

@ -194,7 +194,10 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext)
// We don't want to do this if a test is needed though, because it would interrupt macro-op
// fusion.
arg.Unlock();
gpr.Flush(~js.op->gprInUse);
gpr.Flush(~(js.op->gprWillBeRead | js.op->gprWillBeWritten) &
(js.op->regsIn | js.op->regsOut),
RegCache::FlushMode::Full);
gpr.Flush(~js.op->gprWillBeWritten & js.op->regsOut, RegCache::FlushMode::Undirty);
}
DoMergedBranchCondition();
}

View File

@ -115,7 +115,7 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty)
BitSet32 GPRRegCache::GetRegUtilization() const
{
return m_jit.js.op->gprInUse;
return m_jit.js.op->gprWillBeRead | m_jit.js.op->gprWillBeWritten;
}
BitSet32 GPRRegCache::CountRegsIn(preg_t preg, u32 lookahead) const

View File

@ -330,7 +330,8 @@ void RegCache::Discard(BitSet32 pregs)
}
}
void RegCache::Flush(BitSet32 pregs, IgnoreDiscardedRegisters ignore_discarded_registers)
void RegCache::Flush(BitSet32 pregs, FlushMode mode,
IgnoreDiscardedRegisters ignore_discarded_registers)
{
ASSERT_MSG(DYNA_REC, std::ranges::none_of(m_xregs, &X64CachedReg::IsLocked),
"Someone forgot to unlock a X64 reg");
@ -342,7 +343,7 @@ void RegCache::Flush(BitSet32 pregs, IgnoreDiscardedRegisters ignore_discarded_r
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!",
i);
StoreFromRegister(i, FlushMode::Full, ignore_discarded_registers);
StoreFromRegister(i, mode, ignore_discarded_registers);
}
}
@ -615,7 +616,7 @@ void RegCache::Realize(preg_t preg)
if (m_constraints[preg].ShouldBeRevertable())
{
StoreFromRegister(preg, FlushMode::MaintainState);
StoreFromRegister(preg, FlushMode::Undirty);
do_bind();
m_regs[preg].SetRevertable();
return;

View File

@ -121,8 +121,11 @@ class RegCache
public:
enum class FlushMode
{
// All dirty registers get written back, and all registers get removed from the cache.
Full,
MaintainState,
// All dirty registers get written back and get set as no longer dirty.
// No registers are removed from the cache.
Undirty,
};
enum class IgnoreDiscardedRegisters
@ -175,7 +178,7 @@ public:
RCForkGuard Fork();
void Discard(BitSet32 pregs);
void Flush(BitSet32 pregs = BitSet32::AllTrue(32),
void Flush(BitSet32 pregs = BitSet32::AllTrue(32), FlushMode mode = FlushMode::Full,
IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No);
void Reset(BitSet32 pregs);
void Revert();

View File

@ -257,8 +257,8 @@ void JitArm64::Shutdown()
void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG, IgnoreDiscardedRegisters::Yes);
if (js.op->canEndBlock)
{
@ -322,8 +322,8 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
void JitArm64::HLEFunction(u32 hook_index)
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
ABI_CallFunction(&HLE::ExecuteFromJIT, js.compilerPC, hook_index, &m_system);
}
@ -456,8 +456,8 @@ void JitArm64::MSRUpdated(u32 msr)
// Call PageTableUpdatedFromJit if needed
if (UReg_MSR(msr).DR)
{
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
auto WA = gpr.GetScopedReg();
@ -497,8 +497,8 @@ void JitArm64::MSRUpdated(ARM64Reg msr)
// Call PageTableUpdatedFromJit if needed
MOV(WA, msr);
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
FixupBranch dr_unset = TBZ(WA, dr_bit);
static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000);
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending));
@ -1311,8 +1311,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address))
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
static_assert(PPCSTATE_OFF(pc) <= 252);
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
@ -1371,8 +1371,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (bJITRegisterCacheOff)
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
m_constant_propagation.Clear();
CompileInstruction(op);
@ -1418,9 +1418,16 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
fpr.DiscardRegisters(op.fprDiscardable);
gpr.DiscardCRRegisters(op.crDiscardable);
}
gpr.StoreRegisters(~op.gprInUse & (op.regsIn | op.regsOut));
fpr.StoreRegisters(~op.fprInUse & (op.fregsIn | op.GetFregsOut()));
gpr.StoreCRRegisters(~op.crInUse & (op.crIn | op.crOut));
gpr.FlushRegisters(~(op.gprWillBeRead | op.gprWillBeWritten) & (op.regsIn | op.regsOut),
FlushMode::Full);
fpr.FlushRegisters(~(op.fprWillBeRead | op.fprWillBeWritten) &
(op.fregsIn | op.GetFregsOut()),
FlushMode::Full);
gpr.FlushCRRegisters(~(op.crWillBeRead | op.crWillBeWritten) & (op.crIn | op.crOut),
FlushMode::Full);
gpr.FlushRegisters(~op.gprWillBeWritten & op.regsOut, FlushMode::Undirty);
fpr.FlushRegisters(~op.fprWillBeWritten & op.GetFregsOut(), FlushMode::Undirty);
gpr.FlushCRRegisters(~op.crWillBeWritten & op.crOut, FlushMode::Undirty);
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
@ -1435,8 +1442,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (code_block.m_broken)
{
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
WriteExit(nextPC);
}

View File

@ -322,8 +322,8 @@ void JitArm64::FlushPPCStateBeforeSlowAccess(ARM64Reg temp_gpr, ARM64Reg temp_fp
MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks();
if (mem_checks.HasAny())
{
gpr.StoreRegisters(mem_checks.GetGPRsUsedInConditions(), temp_gpr, FlushMode::MaintainState);
fpr.StoreRegisters(mem_checks.GetFPRsUsedInConditions(), temp_fpr, FlushMode::MaintainState);
gpr.FlushRegisters(mem_checks.GetGPRsUsedInConditions(), FlushMode::MaintainState, temp_gpr);
fpr.FlushRegisters(mem_checks.GetFPRsUsedInConditions(), FlushMode::MaintainState, temp_fpr);
}
}

View File

@ -21,8 +21,8 @@ void JitArm64::sc(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITBranchOff);
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
{
auto WA = gpr.GetScopedReg();
@ -39,8 +39,8 @@ void JitArm64::rfi(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITBranchOff);
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
// See Interpreter rfi for details
const u32 mask = 0x87C0FFFF;
@ -140,8 +140,8 @@ void JitArm64::bx(UGeckoInstruction inst)
return;
}
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
if (js.op->branchIsIdleLoop)
{
@ -243,8 +243,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush(FlushMode::All, WA);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, WA);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
if (IsBranchWatchEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {}, {});
@ -275,8 +275,8 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
// BO_2 == 1z1zz -> b always
// NPC = CTR & 0xfffffffc;
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
Arm64GPRCache::ScopedARM64Reg WB = ARM64Reg::INVALID_REG;
if (inst.LK_3)
@ -345,8 +345,8 @@ void JitArm64::bclrx(UGeckoInstruction inst)
STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
}
gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, WB);
fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::Full, WB);
fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::Full, ARM64Reg::INVALID_REG);
if (IsBranchWatchEnabled())
{
@ -390,8 +390,8 @@ void JitArm64::bclrx(UGeckoInstruction inst)
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush(FlushMode::All, WA);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, WA);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
if (IsBranchWatchEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {}, {});

View File

@ -559,26 +559,11 @@ void JitArm64::lmw(UGeckoInstruction inst)
}
}
BitSet32 gprs_to_flush = ~js.op->gprInUse & BitSet32(0xFFFFFFFFU << d);
if (!js.op->gprInUse[a])
{
if (!a_is_addr_base_reg)
{
gprs_to_flush[a] = true;
}
else
{
gprs_to_flush[a] = false;
BitSet32 gprs_to_undirty = ~js.op->gprWillBeWritten & BitSet32(0xFFFFFFFFU << d);
if (a + 1 == d && (std::countr_one((~js.op->gprInUse).m_val >> a) & 1) == 0)
{
// In this situation, we can save one store instruction by flushing GPR d together with GPR
// a, but we shouldn't flush GPR a until the end of the PPC instruction. Therefore, let's
// also wait with flushing GPR d until the end of the PPC instruction.
gprs_to_flush[d] = false;
}
}
}
BitSet32 gprs_to_flush = ~(js.op->gprWillBeWritten | js.op->gprWillBeRead);
if (a_is_addr_base_reg)
gprs_to_flush[a] = false;
// TODO: This doesn't handle rollback on DSI correctly
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_32;
@ -614,18 +599,20 @@ void JitArm64::lmw(UGeckoInstruction inst)
{
gpr.DiscardRegisters(BitSet32{int(i)});
}
else if (gprs_to_flush[i])
else if (gprs_to_undirty[i])
{
BitSet32 gprs_to_flush_this_time{};
if (i != 0 && gprs_to_flush[i - 1])
gprs_to_flush_this_time = BitSet32{int(i - 1), int(i)};
else if (i == 31 || !gprs_to_flush[i + 1])
gprs_to_flush_this_time = BitSet32{int(i)};
BitSet32 gprs_to_undirty_this_time{};
if (i != 0 && gprs_to_undirty[i - 1])
gprs_to_undirty_this_time = BitSet32{int(i - 1), int(i)};
else if (i == 31 || !gprs_to_undirty[i + 1])
gprs_to_undirty_this_time = BitSet32{int(i)};
else
continue;
gpr.StoreRegisters(gprs_to_flush_this_time);
gprs_to_flush &= ~gprs_to_flush_this_time;
gpr.FlushRegisters(gprs_to_undirty_this_time, FlushMode::Undirty, ARM64Reg::INVALID_REG);
gpr.FlushRegisters(gprs_to_undirty_this_time & gprs_to_flush, FlushMode::Full,
ARM64Reg::INVALID_REG);
gprs_to_undirty &= ~gprs_to_undirty_this_time;
}
}
@ -677,27 +664,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
}
}
const BitSet32 dirty_gprs_to_flush_unmasked = ~js.op->gprInUse & gpr.GetDirtyGPRs();
BitSet32 dirty_gprs_to_flush = dirty_gprs_to_flush_unmasked & BitSet32(0xFFFFFFFFU << s);
if (dirty_gprs_to_flush_unmasked[a])
{
if (!a_is_addr_base_reg)
{
dirty_gprs_to_flush[a] = true;
}
else
{
dirty_gprs_to_flush[a] = false;
if (a + 1 == s && (std::countr_one((~js.op->gprInUse).m_val >> a) & 1) == 0)
{
// In this situation, we can save one store instruction by flushing GPR s together with GPR
// a, but we shouldn't flush GPR a until the end of the PPC instruction. Therefore, let's
// also wait with flushing GPR s until the end of the PPC instruction.
dirty_gprs_to_flush[s] = false;
}
}
}
const BitSet32 gprs_to_flush = ~(js.op->gprWillBeRead | js.op->gprWillBeWritten);
// TODO: This doesn't handle rollback on DSI correctly
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_32;
@ -720,34 +687,12 @@ void JitArm64::stmw(UGeckoInstruction inst)
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
// after this instruction, flush registers that would be flushed after this instruction anyway.
//
// We try to store two registers at a time when possible to let the register cache use STP.
// To reduce register pressure, flush registers that would be flushed after this instruction
// anyway.
if (gprs_to_discard[i])
{
gpr.DiscardRegisters(BitSet32{int(i)});
}
else if (dirty_gprs_to_flush[i])
{
BitSet32 gprs_to_flush_this_time{};
if (i != 0 && dirty_gprs_to_flush[i - 1])
gprs_to_flush_this_time = BitSet32{int(i - 1), int(i)};
else if (i == 31 || !dirty_gprs_to_flush[i + 1])
gprs_to_flush_this_time = BitSet32{int(i)};
else
continue;
gpr.StoreRegisters(gprs_to_flush_this_time);
dirty_gprs_to_flush &= ~gprs_to_flush_this_time;
}
else if (!js.op->gprInUse[i])
{
// If this register can be flushed but it isn't dirty, no store instruction will be emitted
// when flushing it, so it doesn't matter if we flush it together with another register or
// not. Let's just flush it in the simplest way possible.
gpr.StoreRegisters(BitSet32{int(i)});
}
else if (gprs_to_flush[i])
gpr.FlushRegisters(BitSet32{int(i)}, FlushMode::Full, ARM64Reg::INVALID_REG);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);

View File

@ -121,7 +121,7 @@ void Arm64RegCache::FlushMostStaleRegister()
}
}
FlushRegister(most_stale_preg, FlushMode::All, ARM64Reg::INVALID_REG);
FlushRegister(most_stale_preg, FlushMode::Full, ARM64Reg::INVALID_REG);
}
void Arm64RegCache::DiscardRegister(size_t preg)
@ -203,11 +203,15 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg
if (!reg.IsInPPCState())
m_emit->STR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
if (mode == FlushMode::All)
if (mode == FlushMode::Full)
{
UnlockRegister(EncodeRegTo32(host_reg));
reg.Flush();
}
else if (mode == FlushMode::Undirty)
{
reg.SetDirty(false);
}
}
else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET))
{
@ -244,8 +248,10 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg
}
}
if (mode == FlushMode::All)
if (mode == FlushMode::Full)
reg.Flush();
else if (mode == FlushMode::Undirty)
reg.SetDirty(false);
}
}
@ -270,10 +276,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
const bool reg2_imm = IsImm(i + 1);
const bool reg1_zero = reg1_imm && GetImm(i) == 0;
const bool reg2_zero = reg2_imm && GetImm(i + 1) == 0;
const bool flush_all = mode == FlushMode::All;
const bool can_allocate_reg = mode != FlushMode::MaintainState;
if (!reg1.IsInPPCState() && !reg2.IsInPPCState() &&
(reg1.IsInHostRegister() || (reg1_imm && (reg1_zero || flush_all))) &&
(reg2.IsInHostRegister() || (reg2_imm && (reg2_zero || flush_all))))
(reg1.IsInHostRegister() || (reg1_imm && (reg1_zero || can_allocate_reg))) &&
(reg2.IsInHostRegister() || (reg2_imm && (reg2_zero || can_allocate_reg))))
{
const size_t ppc_offset = GetGuestByIndex(i).ppc_offset;
if (ppc_offset <= 252)
@ -281,7 +287,7 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : BindForRead(i);
ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : BindForRead(i + 1);
m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset));
if (flush_all)
if (mode == FlushMode::Full)
{
if (reg1.IsInHostRegister())
UnlockRegister(reg1.GetReg());
@ -290,6 +296,11 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
reg1.Flush();
reg2.Flush();
}
else if (mode == FlushMode::Undirty)
{
reg1.SetDirty(false);
reg2.SetDirty(false);
}
++iter;
continue;
}
@ -497,7 +508,7 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg)
const OpArg& reg = m_guest_registers[i];
if (reg.IsInHostRegister() && DecodeReg(reg.GetReg()) == DecodeReg(host_reg))
{
FlushRegister(i, FlushMode::All, tmp_reg);
FlushRegister(i, FlushMode::Full, tmp_reg);
return;
}
}
@ -788,7 +799,7 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg)
if (reg.IsInHostRegister() && reg.GetReg() == host_reg)
{
FlushRegister(i, FlushMode::All, tmp_reg);
FlushRegister(i, FlushMode::Full, tmp_reg);
return;
}
}
@ -817,6 +828,22 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg)
const bool dirty = !reg.IsInPPCState();
RegType type = reg.GetFPRType();
if (mode == FlushMode::Undirty)
{
switch (type)
{
case RegType::Single:
case RegType::DuplicatedSingle:
case RegType::LowerPairSingle:
// In this situation, skip flushing. It's usually better to wait until later instead to avoid
// extra conversions. We can revisit this decision in the future if the register cache gets
// the ability to store both the single and double versions of a value simultaneously.
return;
default:
break;
}
}
bool allocated_tmp_reg = false;
if (tmp_reg != ARM64Reg::INVALID_REG)
{
@ -868,11 +895,15 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg)
static_cast<s32>(PPCSTATE_OFF_PS0(preg)));
}
if (mode == FlushMode::All)
if (mode == FlushMode::Full)
{
UnlockRegister(host_reg);
reg.Flush();
}
else if (mode == FlushMode::Undirty)
{
reg.SetDirty(false);
}
}
else if (type == RegType::Duplicated)
{
@ -892,11 +923,15 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg)
}
}
if (mode == FlushMode::All)
if (mode == FlushMode::Full)
{
UnlockRegister(host_reg);
reg.Flush();
}
else if (mode == FlushMode::Undirty)
{
reg.SetDirty(false);
}
}
if (allocated_tmp_reg)

View File

@ -68,13 +68,17 @@ enum class RegType
DuplicatedSingle, // PS0 and PS1 are identical, host register only stores one lane (32-bit)
};
enum class FlushMode : bool
enum class FlushMode
{
// Flushes all registers, no exceptions
All,
// Flushes registers in a conditional branch
// Doesn't wipe the state of the registers from the cache
// All dirty registers get written back, and all registers get removed from the cache.
Full,
// All dirty registers get written back, but the state of the cache is untouched.
// The host registers may get clobbered. This is intended for use when doing a block exit
// after a conditional branch.
MaintainState,
// Most dirty registers get written back and get set as no longer dirty.
// No registers are removed from the cache.
Undirty,
};
enum class IgnoreDiscardedRegisters
@ -379,17 +383,15 @@ public:
BitSet32 GetDirtyGPRs() const;
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
FlushMode flush_mode = FlushMode::All)
{
FlushRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No);
}
void FlushRegisters(
BitSet32 regs, FlushMode flush_mode = FlushMode::Full,
Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No);
void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
FlushMode flush_mode = FlushMode::All)
{
FlushCRRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No);
}
void FlushCRRegisters(
BitSet8 regs, FlushMode flush_mode = FlushMode::Full,
Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No);
void DiscardCRRegisters(BitSet8 regs);
void ResetCRRegisters(BitSet8 regs);
@ -436,11 +438,6 @@ private:
void SetImmediateInternal(size_t index, u32 imm, bool dirty);
void BindForWrite(size_t index, bool will_read, bool will_write = true);
void FlushRegisters(BitSet32 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg,
IgnoreDiscardedRegisters ignore_discarded_registers);
void FlushCRRegisters(BitSet8 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg,
IgnoreDiscardedRegisters ignore_discarded_registers);
static constexpr size_t GUEST_GPR_COUNT = 32;
static constexpr size_t GUEST_CR_COUNT = 8;
static constexpr size_t GUEST_GPR_OFFSET = 0;
@ -470,11 +467,8 @@ public:
void FixSinglePrecision(size_t preg);
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
FlushMode flush_mode = FlushMode::All)
{
FlushRegisters(regs, flush_mode, tmp_reg);
}
void FlushRegisters(BitSet32 regs, FlushMode flush_mode = FlushMode::Full,
Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG);
protected:
// Get the order of the host registers
@ -489,6 +483,4 @@ protected:
private:
bool IsCallerSaved(Arm64Gen::ARM64Reg reg) const;
bool IsTopHalfUsed(Arm64Gen::ARM64Reg reg) const;
void FlushRegisters(BitSet32 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg);
};

View File

@ -227,8 +227,8 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
if (!imm_value)
MSRUpdated(gpr.R(inst.RS));
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
WriteExceptionExit(js.compilerPC + 4, true);
}
@ -367,8 +367,8 @@ void JitArm64::twx(UGeckoInstruction inst)
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush(FlushMode::All, WA);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
gpr.Flush(FlushMode::Full, WA);
fpr.Flush(FlushMode::Full, ARM64Reg::INVALID_REG);
WriteExit(js.compilerPC + 4);
}
}
@ -714,15 +714,12 @@ void JitArm64::mfcr(UGeckoInstruction inst)
CMP(CR, ARM64Reg::ZR);
CSEL(WA, WC, WA, CC_GT);
// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
// after this instruction, flush registers that would be flushed after this instruction anyway.
//
// There's no point in ensuring we flush two registers at the same time, because the offset in
// ppcState for CRs is too large to be encoded into an STP instruction.
// To reduce register pressure, flush registers that would be flushed after this instruction
// anyway.
if (js.op->crDiscardable[i])
gpr.DiscardCRRegisters(BitSet8{i});
else if (!js.op->crInUse[i])
gpr.StoreCRRegisters(BitSet8{i}, WC);
else if (!(js.op->crWillBeRead | js.op->crWillBeWritten)[i])
gpr.FlushCRRegisters(BitSet8{i}, FlushMode::Full, WC);
}
}

View File

@ -984,8 +984,9 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
// wants flags, to be safe.
bool wantsFPRF = true;
bool wantsCA = true;
BitSet8 crInUse, crDiscardable;
BitSet32 gprBlockInputs, gprInUse, fprInUse, gprDiscardable, fprDiscardable, fprInXmm;
BitSet8 crWillBeRead, crWillBeWritten, crDiscardable;
BitSet32 gprWillBeRead, gprWillBeWritten, fprWillBeRead, fprWillBeWritten, gprDiscardable,
fprDiscardable, fprInXmm;
for (int i = block->m_num_instructions - 1; i >= 0; i--)
{
CodeOp& op = code[i];
@ -1012,28 +1013,38 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
wantsCA |= opWantsCA || may_exit_block;
wantsFPRF &= !op.outputFPRF || opWantsFPRF;
wantsCA &= !op.outputCA || opWantsCA;
op.gprInUse = gprInUse;
op.fprInUse = fprInUse;
op.crInUse = crInUse;
op.gprWillBeRead = gprWillBeRead;
op.gprWillBeWritten = gprWillBeWritten;
op.fprWillBeRead = fprWillBeRead;
op.fprWillBeWritten = fprWillBeWritten;
op.crWillBeRead = crWillBeRead;
op.crWillBeWritten = crWillBeWritten;
op.gprDiscardable = gprDiscardable;
op.fprDiscardable = fprDiscardable;
op.crDiscardable = crDiscardable;
op.fprInXmm = fprInXmm;
gprBlockInputs &= ~op.regsOut;
gprBlockInputs |= op.regsIn;
gprInUse |= op.regsIn | op.regsOut;
fprInUse |= op.fregsIn | op.GetFregsOut();
crInUse |= op.crIn | op.crOut;
gprWillBeRead &= ~op.regsOut;
gprWillBeRead |= op.regsIn;
gprWillBeWritten |= op.regsOut;
fprWillBeRead &= ~op.GetFregsOut();
fprWillBeRead |= op.fregsIn;
fprWillBeWritten |= op.GetFregsOut();
crWillBeRead &= ~op.crOut;
crWillBeRead |= op.crIn;
crWillBeWritten |= op.crOut;
if (strncmp(op.opinfo->opname, "stfd", 4))
fprInXmm |= op.fregsIn;
if (hle || breakpoint)
{
gprInUse = BitSet32{};
fprInUse = BitSet32{};
gprWillBeRead = BitSet32{};
gprWillBeWritten = BitSet32{};
fprWillBeRead = BitSet32{};
fprWillBeWritten = BitSet32{};
fprInXmm = BitSet32{};
crInUse = BitSet8{};
crWillBeRead = BitSet8{};
crWillBeWritten = BitSet8{};
gprDiscardable = BitSet32{};
fprDiscardable = BitSet32{};
crDiscardable = BitSet8{};
@ -1149,7 +1160,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
}
block->m_gqr_used = gqrUsed;
block->m_gqr_modified = gqrModified;
block->m_gpr_inputs = gprBlockInputs;
block->m_gpr_inputs = gprWillBeRead;
return address;
}

View File

@ -51,11 +51,14 @@ struct CodeOp // 16B
bool canCauseException = false;
bool skipLRStack = false;
bool skip = false; // followed BL-s for example
BitSet8 crInUse;
BitSet8 crWillBeRead;
BitSet8 crWillBeWritten;
BitSet8 crDiscardable;
// which registers are still needed after this instruction in this block
BitSet32 fprInUse;
BitSet32 gprInUse;
BitSet32 gprWillBeRead;
BitSet32 gprWillBeWritten;
BitSet32 fprWillBeRead;
BitSet32 fprWillBeWritten;
// which registers have values which are known to be unused after this instruction
BitSet32 gprDiscardable;
BitSet32 fprDiscardable;