From 30c0adacaad5e9435b5b3ab98b5554f342ca3324 Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Sun, 28 Sep 2025 15:02:03 -0500
Subject: [PATCH 01/14] Handles paired single move rounding and other quirks,
 improves fres accuracy

---
 Source/Core/Common/CMakeLists.txt             |   2 -
 Source/Core/Common/FloatUtils.h               | 108 ----------
 Source/Core/Core/CMakeLists.txt               |   2 +
 Source/Core/{Common => Core}/FloatUtils.cpp   |  81 +++++---
 Source/Core/Core/FloatUtils.h                 | 191 ++++++++++++++++++
 .../PowerPC/Interpreter/Interpreter_FPUtils.h |  54 ++---
 .../Interpreter/Interpreter_FloatingPoint.cpp |  18 +-
 .../Interpreter/Interpreter_Paired.cpp        |  70 ++++---
 .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp |  54 +++--
 .../PowerPC/Jit64Common/Jit64AsmCommon.cpp    |  42 ++--
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp  |  43 ++--
 Source/Core/Core/PowerPC/PowerPC.cpp          |  26 ++-
 Source/Core/Core/PowerPC/PowerPC.h            |  10 +-
 Source/Core/DolphinLib.props                  |   4 +-
 .../DolphinQt/Debugger/MemoryViewWidget.cpp   |   1 +
 Source/UnitTests/Common/CMakeLists.txt        |   1 -
 Source/UnitTests/Core/CMakeLists.txt          |   1 +
 .../{Common => Core}/FloatUtilsTest.cpp       |  36 ++--
 .../Core/PowerPC/Jit64Common/Frsqrte.cpp      |   4 +-
 .../UnitTests/Core/PowerPC/JitArm64/Fres.cpp  |   5 +-
 .../Core/PowerPC/JitArm64/Frsqrte.cpp         |   2 +-
 Source/UnitTests/UnitTests.vcxproj            |   2 +-
 22 files changed, 453 insertions(+), 304 deletions(-)
 delete mode 100644 Source/Core/Common/FloatUtils.h
 rename Source/Core/{Common => Core}/FloatUtils.cpp (63%)
 create mode 100644 Source/Core/Core/FloatUtils.h
 rename Source/UnitTests/{Common => Core}/FloatUtilsTest.cpp (69%)
diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt
index 5ef6e8527f..bb9a8e0ebd 100644
--- a/Source/Core/Common/CMakeLists.txt
+++ b/Source/Core/Common/CMakeLists.txt
@@ -71,8 +71,6 @@ add_library(common
   FileUtil.h
   FixedSizeQueue.h
   Flag.h
-  FloatUtils.cpp
-  FloatUtils.h
   Functional.h
   FormatUtil.h
   FPURoundMode.h
diff --git a/Source/Core/Common/FloatUtils.h b/Source/Core/Common/FloatUtils.h
deleted file mode 100644
index bc950f90e2..0000000000
--- a/Source/Core/Common/FloatUtils.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2018 Dolphin Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <array>
-#include <bit>
-#include <limits>
-
-#include "Common/CommonTypes.h"
-
-namespace Common
-{
-template <typename T>
-constexpr T SNANConstant()
-{
-  return std::numeric_limits<T>::signaling_NaN();
-}
-
-// The most significant bit of the fraction is an is-quiet bit on all architectures we care about.
-static constexpr u64 DOUBLE_QBIT = 0x0008000000000000ULL;
-static constexpr u64 DOUBLE_SIGN = 0x8000000000000000ULL;
-static constexpr u64 DOUBLE_EXP = 0x7FF0000000000000ULL;
-static constexpr u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL;
-static constexpr u64 DOUBLE_ZERO = 0x0000000000000000ULL;
-static constexpr int DOUBLE_EXP_WIDTH = 11;
-static constexpr int DOUBLE_FRAC_WIDTH = 52;
-
-static constexpr u32 FLOAT_SIGN = 0x80000000;
-static constexpr u32 FLOAT_EXP = 0x7F800000;
-static constexpr u32 FLOAT_FRAC = 0x007FFFFF;
-static constexpr u32 FLOAT_ZERO = 0x00000000;
-static constexpr int FLOAT_EXP_WIDTH = 8;
-static constexpr int FLOAT_FRAC_WIDTH = 23;
-
-inline bool IsQNAN(double d)
-{
-  const u64 i = std::bit_cast<u64>(d);
-  return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_QBIT) == DOUBLE_QBIT);
-}
-
-inline bool IsSNAN(double d)
-{
-  const u64 i = std::bit_cast<u64>(d);
-  return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
-         ((i & DOUBLE_QBIT) == DOUBLE_ZERO);
-}
-
-inline float FlushToZero(float f)
-{
-  u32 i = std::bit_cast<u32>(f);
-  if ((i & FLOAT_EXP) == 0)
-  {
-    // Turn into signed zero
-    i &= FLOAT_SIGN;
-  }
-  return std::bit_cast<float>(i);
-}
-
-inline double FlushToZero(double d)
-{
-  u64 i = std::bit_cast<u64>(d);
-  if ((i & DOUBLE_EXP) == 0)
-  {
-    // Turn into signed zero
-    i &= DOUBLE_SIGN;
-  }
-  return std::bit_cast<double>(i);
-}
-
-inline double MakeQuiet(double d)
-{
-  const u64 integral = std::bit_cast<u64>(d) | Common::DOUBLE_QBIT;
-
-  return std::bit_cast<double>(integral);
-}
-
-enum PPCFpClass
-{
-  PPC_FPCLASS_QNAN = 0x11,
-  PPC_FPCLASS_NINF = 0x9,
-  PPC_FPCLASS_NN = 0x8,
-  PPC_FPCLASS_ND = 0x18,
-  PPC_FPCLASS_NZ = 0x12,
-  PPC_FPCLASS_PZ = 0x2,
-  PPC_FPCLASS_PD = 0x14,
-  PPC_FPCLASS_PN = 0x4,
-  PPC_FPCLASS_PINF = 0x5,
-};
-
-// Uses PowerPC conventions for the return value, so it can be easily
-// used directly in CPU emulation.
-u32 ClassifyDouble(double dvalue);
-u32 ClassifyFloat(float fvalue);
-
-struct BaseAndDec
-{
-  int m_base;
-  int m_dec;
-};
-extern const std::array<BaseAndDec, 32> frsqrte_expected;
-extern const std::array<BaseAndDec, 32> fres_expected;
-
-// PowerPC approximation algorithms
-double ApproximateReciprocalSquareRoot(double val);
-double ApproximateReciprocal(double val);
-
-}  // namespace Common
diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt
index 85e446e2a0..c872041515 100644
--- a/Source/Core/Core/CMakeLists.txt
+++ b/Source/Core/Core/CMakeLists.txt
@@ -123,6 +123,8 @@ add_library(core
   FifoPlayer/FifoPlayer.h
   FifoPlayer/FifoRecorder.cpp
   FifoPlayer/FifoRecorder.h
+  FloatUtils.cpp
+  FloatUtils.h
   FreeLookManager.cpp
   FreeLookManager.h
   GeckoCode.cpp
diff --git a/Source/Core/Common/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
similarity index 63%
rename from Source/Core/Common/FloatUtils.cpp
rename to Source/Core/Core/FloatUtils.cpp
index 312d4a88a2..b6ad62b213 100644
--- a/Source/Core/Common/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -1,12 +1,12 @@
 // Copyright 2018 Dolphin Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-#include "Common/FloatUtils.h"
+#include "Core/FloatUtils.h"
 
 #include <bit>
 #include <cmath>
 
-namespace Common
+namespace Core
 {
 u32 ClassifyDouble(double dvalue)
 {
@@ -98,7 +98,7 @@ double ApproximateReciprocalSquareRoot(double val)
   }
 
   // Special case NaN-ish numbers
-  if (exponent == (0x7FFLL << 52))
+  if (exponent == DOUBLE_EXP)
   {
     if (mantissa == 0)
     {
@@ -123,7 +123,7 @@ double ApproximateReciprocalSquareRoot(double val)
       exponent -= 1LL << 52;
       mantissa <<= 1;
     } while (!(mantissa & (1LL << 52)));
-    mantissa &= (1LL << 52) - 1;
+    mantissa &= DOUBLE_FRAC;
     exponent += 1LL << 52;
   }
 
@@ -139,51 +139,70 @@ double ApproximateReciprocalSquareRoot(double val)
 }
 
 const std::array<BaseAndDec, 32> fres_expected = {{
-    {0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313},
-    {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261},
-    {0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5},
-    {0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
-    {0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143},
-    {0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a},
-    {0x041c00, 0x108}, {0x020c00, 0x106},
+    {0xfff000, -0x3e1}, {0xf07000, -0x3a7}, {0xe1d400, -0x371}, {0xd41000, -0x340},
+    {0xc71000, -0x313}, {0xbac400, -0x2ea}, {0xaf2000, -0x2c4}, {0xa41000, -0x2a0},
+    {0x999000, -0x27f}, {0x8f9400, -0x261}, {0x861000, -0x245}, {0x7d0000, -0x22a},
+    {0x745800, -0x212}, {0x6c1000, -0x1fb}, {0x642800, -0x1e5}, {0x5c9400, -0x1d1},
+    {0x555000, -0x1be}, {0x4e5800, -0x1ac}, {0x47ac00, -0x19b}, {0x413c00, -0x18b},
+    {0x3b1000, -0x17c}, {0x352000, -0x16e}, {0x2f5c00, -0x15b}, {0x29f000, -0x15b},
+    {0x248800, -0x143}, {0x1f7c00, -0x143}, {0x1a7000, -0x12d}, {0x15bc00, -0x12d},
+    {0x110800, -0x11a}, {0x0ca000, -0x11a}, {0x083800, -0x108}, {0x041800, -0x106},
 }};
 
 // Used by fres and ps_res.
-double ApproximateReciprocal(double val)
+double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val)
 {
-  s64 integral = std::bit_cast<s64>(val);
-  const s64 mantissa = integral & ((1LL << 52) - 1);
-  const s64 sign = integral & (1ULL << 63);
-  s64 exponent = integral & (0x7FFLL << 52);
+  const u64 integral = std::bit_cast<u64>(val);
+
+  // Convert into a float when possible
+  const u64 signless = integral & ~DOUBLE_SIGN;
+  const u32 mantissa =
+      static_cast<u32>((integral & DOUBLE_FRAC) >> (DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH));
+  const u32 sign = static_cast<u32>((integral >> 32) & FLOAT_SIGN);
+  const s32 exponent = static_cast<s32>((integral & DOUBLE_EXP) >> DOUBLE_FRAC_WIDTH) - 0x380;
+
+  // The largest floats possible just return 0
+  const u64 huge_float = fpscr.NI ? 0x47d0000000000000ULL : 0x4940000000000000ULL;
 
   // Special case 0
-  if (mantissa == 0 && exponent == 0)
+  if (signless == 0)
     return std::copysign(std::numeric_limits<double>::infinity(), val);
 
-  // Special case NaN-ish numbers
-  if (exponent == (0x7FFLL << 52))
+  // Special case huge or NaN-ish numbers
+  if (signless >= huge_float)
   {
-    if (mantissa == 0)
+    if (!std::isnan(val))
       return std::copysign(0.0, val);
     return MakeQuiet(val);
   }
 
   // Special case small inputs
-  if (exponent < (895LL << 52))
+  if (exponent < -1)
     return std::copysign(std::numeric_limits<float>::max(), val);
 
-  // Special case large inputs
-  if (exponent >= (1149LL << 52))
-    return std::copysign(0.0, val);
+  const s32 new_exponent = 253 - exponent;
 
-  exponent = (0x7FDLL << 52) - exponent;
-
-  const int i = static_cast<int>(mantissa >> 37);
+  const u32 i = static_cast<u32>(mantissa >> 8);
   const auto& entry = fres_expected[i / 1024];
-  integral = sign | exponent;
-  integral |= static_cast<s64>(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29;
+  const u32 new_mantissa = static_cast<u32>(entry.m_base + entry.m_dec * (i % 1024)) / 2;
 
-  return std::bit_cast<double>(integral);
+  u32 result = sign | (static_cast<u32>(new_exponent) << FLOAT_FRAC_WIDTH) | new_mantissa;
+  if (new_exponent <= 0)
+  {
+    // Result is subnormal so format it properly!
+    if (fpscr.NI)
+    {
+      // Flush to 0 if inexact
+      result = sign;
+    }
+    else
+    {
+      // Shift by the exponent amount
+      u32 shift = 1 + static_cast<u32>(-new_exponent);
+      result = sign | (((1 << FLOAT_FRAC_WIDTH) | new_mantissa) >> shift);
+    }
+  }
+  return static_cast<double>(std::bit_cast<float>(result));
 }
 
-}  // namespace Common
+}  // namespace Core
diff --git a/Source/Core/Core/FloatUtils.h b/Source/Core/Core/FloatUtils.h
new file mode 100644
index 0000000000..27a6b03a4b
--- /dev/null
+++ b/Source/Core/Core/FloatUtils.h
@@ -0,0 +1,191 @@
+// Copyright 2018 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <bit>
+#include <limits>
+
+#include "Common/CommonTypes.h"
+#include "Core/PowerPC/Gekko.h"
+
+namespace Core
+{
+template <typename T>
+constexpr T SNANConstant()
+{
+  return std::numeric_limits<T>::signaling_NaN();
+}
+
+// The most significant bit of the fraction is an is-quiet bit on all architectures we care about.
+static constexpr u64 DOUBLE_QBIT = 0x0008000000000000ULL;
+static constexpr u64 DOUBLE_SIGN = 0x8000000000000000ULL;
+static constexpr u64 DOUBLE_EXP = 0x7FF0000000000000ULL;
+static constexpr u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL;
+static constexpr u64 DOUBLE_ZERO = 0x0000000000000000ULL;
+static constexpr int DOUBLE_EXP_WIDTH = 11;
+static constexpr int DOUBLE_FRAC_WIDTH = 52;
+
+static constexpr u32 FLOAT_SIGN = 0x80000000;
+static constexpr u32 FLOAT_EXP = 0x7F800000;
+static constexpr u32 FLOAT_FRAC = 0x007FFFFF;
+static constexpr u32 FLOAT_ZERO = 0x00000000;
+static constexpr int FLOAT_EXP_WIDTH = 8;
+static constexpr int FLOAT_FRAC_WIDTH = 23;
+
+inline bool IsQNAN(double d)
+{
+  const u64 i = std::bit_cast<u64>(d);
+  return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_QBIT) == DOUBLE_QBIT);
+}
+
+inline bool IsSNAN(double d)
+{
+  const u64 i = std::bit_cast<u64>(d);
+  return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
+         ((i & DOUBLE_QBIT) == DOUBLE_ZERO);
+}
+
+inline float FlushToZero(float f)
+{
+  u32 i = std::bit_cast<u32>(f);
+  if ((i & FLOAT_EXP) == 0)
+  {
+    // Turn into signed zero
+    i &= FLOAT_SIGN;
+  }
+  return std::bit_cast<float>(i);
+}
+
+inline double FlushToZero(double d)
+{
+  u64 i = std::bit_cast<u64>(d);
+  if ((i & DOUBLE_EXP) == 0)
+  {
+    // Turn into signed zero
+    i &= DOUBLE_SIGN;
+  }
+  return std::bit_cast<double>(i);
+}
+
+inline double MakeQuiet(double d)
+{
+  const u64 integral = std::bit_cast<u64>(d) | DOUBLE_QBIT;
+
+  return std::bit_cast<double>(integral);
+}
+
+enum PPCFpClass
+{
+  PPC_FPCLASS_QNAN = 0x11,
+  PPC_FPCLASS_NINF = 0x9,
+  PPC_FPCLASS_NN = 0x8,
+  PPC_FPCLASS_ND = 0x18,
+  PPC_FPCLASS_NZ = 0x12,
+  PPC_FPCLASS_PZ = 0x2,
+  PPC_FPCLASS_PD = 0x14,
+  PPC_FPCLASS_PN = 0x4,
+  PPC_FPCLASS_PINF = 0x5,
+};
+
+// Uses PowerPC conventions for the return value, so it can be easily
+// used directly in CPU emulation.
+u32 ClassifyDouble(double dvalue);
+u32 ClassifyFloat(float fvalue);
+
+struct BaseAndDec
+{
+  int m_base;
+  int m_dec;
+};
+extern const std::array<BaseAndDec, 32> frsqrte_expected;
+extern const std::array<BaseAndDec, 32> fres_expected;
+
+// PowerPC approximation algorithms
+double ApproximateReciprocalSquareRoot(double val);
+double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val);
+
+// Instructions which move data without performing operations round a bit weirdly
+// Specifically they rounding the mantissa to be like that of a 32-bit float,
+// going as far as to focus on the rounding mode, but never actually care about
+// making sure the exponent becomes 32-bit
+// Either this, or they'll truncate the mantissa down, which will always happen to
+// PS1 OR PS0 in ps_rsqrte
+inline u64 TruncateMantissaBits(u64 bits)
+{
+  // Truncation can be done by simply cutting off the mantissa bits that don't
+  // exist in a single precision float
+  constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH;
+  constexpr u64 remove_mask = (1 << remove_bits) - 1;
+  return bits & ~remove_mask;
+}
+
+inline double TruncateMantissa(double value)
+{
+  u64 bits = std::bit_cast<u64>(value);
+  u64 trunc_bits = TruncateMantissaBits(bits);
+  return std::bit_cast<double>(trunc_bits);
+}
+
+inline u64 RoundMantissaBitsFinite(u64 bits)
+{
+  const u64 replacement_exp = 0x4000000000000000ull;
+
+  // To round only the mantissa, we assume the host CPU properly matches
+  // the emulated CPU's rounding mode so the rounding of the mantissa will
+  // go in the correct direction
+  // The removing and restoring of the exponent is done via subtraction instead of
+  // bitwise operations due to the possibility that the rounding will cause an overflow
+  // from the mantissa into the exponent (incrementing it by 1)
+  u64 resized_bits = (bits & (DOUBLE_FRAC | DOUBLE_SIGN)) | replacement_exp;
+
+  float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
+  double extended_float = static_cast<double>(rounded_float);
+  u64 rounded_bits = std::bit_cast<u64>(extended_float);
+
+  u64 orig_exp_bits = bits & DOUBLE_EXP;
+
+  if (orig_exp_bits == 0)
+  {
+    // The exponent isn't incremented for double subnormals
+    return rounded_bits & ~DOUBLE_EXP;
+  }
+
+  // Handle the change accordingly otherwise!
+  rounded_bits = (rounded_bits - replacement_exp) + orig_exp_bits;
+  return rounded_bits;
+}
+
+inline u64 RoundMantissaBits(u64 bits)
+{
+  // Checking if the value is non-finite
+  if ((bits & DOUBLE_EXP) == DOUBLE_EXP)
+  {
+    // For infinite and NaN values, the mantissa is simply truncated
+    return TruncateMantissaBits(bits);
+  }
+
+  return RoundMantissaBitsFinite(bits);
+}
+
+inline double RoundMantissaFinite(double value)
+{
+  // This function is only ever used by ps_sum1, because
+  // for some reason it assumes that ps0 should be rounded with
+  // finite values rather than checking if they might be infinite
+  u64 bits = std::bit_cast<u64>(value);
+  u64 rounded_bits = RoundMantissaBitsFinite(bits);
+  return std::bit_cast<double>(rounded_bits);
+}
+
+inline double RoundMantissa(double value)
+{
+  // This function just bitcasts the double value parameter so it
+  // can be used in the more common function that operates on the raw bits
+  u64 bits = std::bit_cast<u64>(value);
+  u64 rounded_bits = RoundMantissaBits(bits);
+  return std::bit_cast<double>(rounded_bits);
+}
+
+}  // namespace Core
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
index d01087fe1d..2985f4b1f7 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
@@ -9,7 +9,7 @@
 
 #include "Common/CPUDetect.h"
 #include "Common/CommonTypes.h"
-#include "Common/FloatUtils.h"
+#include "Core/FloatUtils.h"
 #include "Core/PowerPC/Gekko.h"
 #include "Core/PowerPC/Interpreter/ExceptionUtils.h"
 #include "Core/PowerPC/PowerPC.h"
@@ -59,11 +59,11 @@ inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
 
     constexpr u64 smallest_normal_single = 0x3810000000000000;
     const u64 value_without_sign =
-        std::bit_cast<u64>(value) & (Common::DOUBLE_EXP | Common::DOUBLE_FRAC);
+        std::bit_cast<u64>(value) & (Core::DOUBLE_EXP | Core::DOUBLE_FRAC);
 
     if (value_without_sign < smallest_normal_single)
     {
-      const u64 flushed_double = std::bit_cast<u64>(value) & Common::DOUBLE_SIGN;
+      const u64 flushed_double = std::bit_cast<u64>(value) & Core::DOUBLE_SIGN;
       const u32 flushed_single = static_cast<u32>(flushed_double >> 32);
       return std::bit_cast<float>(flushed_single);
     }
@@ -74,7 +74,7 @@ inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
   float x = static_cast<float>(value);
   if (!cpu_info.bFlushToZero && fpscr.NI)
   {
-    x = Common::FlushToZero(x);
+    x = Core::FlushToZero(x);
   }
   return x;
 }
@@ -83,7 +83,7 @@ inline double ForceDouble(const UReg_FPSCR& fpscr, double d)
 {
   if (!cpu_info.bFlushToZero && fpscr.NI)
   {
-    d = Common::FlushToZero(d);
+    d = Core::FlushToZero(d);
   }
   return d;
 }
@@ -92,8 +92,8 @@ inline double Force25Bit(double d)
 {
   u64 integral = std::bit_cast<u64>(d);
 
-  u64 exponent = integral & Common::DOUBLE_EXP;
-  u64 fraction = integral & Common::DOUBLE_FRAC;
+  u64 exponent = integral & Core::DOUBLE_EXP;
+  u64 fraction = integral & Core::DOUBLE_FRAC;
 
   if (exponent == 0 && fraction != 0)
   {
@@ -108,7 +108,7 @@ inline double Force25Bit(double d)
     // the fraction is "normal"
     // That is to say shifting it until the MSB of the fraction
     // would escape into the exponent
-    u32 shift = std::countl_zero(fraction) - (63 - Common::DOUBLE_FRAC_WIDTH);
+    u32 shift = std::countl_zero(fraction) - (63 - Core::DOUBLE_FRAC_WIDTH);
     keep_mask >>= shift;
     round >>= shift;
 
@@ -146,7 +146,7 @@ inline FPResult NI_mul(PowerPC::PowerPCState& ppc_state, double a, double b)
 
   if (std::isnan(result.value))
   {
-    if (Common::IsSNAN(a) || Common::IsSNAN(b))
+    if (Core::IsSNAN(a) || Core::IsSNAN(b))
     {
       result.SetException(ppc_state, FPSCR_VXSNAN);
     }
@@ -155,12 +155,12 @@ inline FPResult NI_mul(PowerPC::PowerPCState& ppc_state, double a, double b)
 
     if (std::isnan(a))
     {
-      result.value = Common::MakeQuiet(a);
+      result.value = Core::MakeQuiet(a);
       return result;
     }
     if (std::isnan(b))
     {
-      result.value = Common::MakeQuiet(b);
+      result.value = Core::MakeQuiet(b);
       return result;
     }
 
@@ -186,19 +186,19 @@ inline FPResult NI_div(PowerPC::PowerPCState& ppc_state, double a, double b)
   }
   else if (std::isnan(result.value))
   {
-    if (Common::IsSNAN(a) || Common::IsSNAN(b))
+    if (Core::IsSNAN(a) || Core::IsSNAN(b))
       result.SetException(ppc_state, FPSCR_VXSNAN);
 
     ppc_state.fpscr.ClearFIFR();
 
     if (std::isnan(a))
     {
-      result.value = Common::MakeQuiet(a);
+      result.value = Core::MakeQuiet(a);
       return result;
     }
     if (std::isnan(b))
     {
-      result.value = Common::MakeQuiet(b);
+      result.value = Core::MakeQuiet(b);
       return result;
     }
 
@@ -220,19 +220,19 @@ inline FPResult NI_add(PowerPC::PowerPCState& ppc_state, double a, double b)
 
   if (std::isnan(result.value))
   {
-    if (Common::IsSNAN(a) || Common::IsSNAN(b))
+    if (Core::IsSNAN(a) || Core::IsSNAN(b))
       result.SetException(ppc_state, FPSCR_VXSNAN);
 
     ppc_state.fpscr.ClearFIFR();
 
     if (std::isnan(a))
     {
-      result.value = Common::MakeQuiet(a);
+      result.value = Core::MakeQuiet(a);
       return result;
     }
     if (std::isnan(b))
     {
-      result.value = Common::MakeQuiet(b);
+      result.value = Core::MakeQuiet(b);
       return result;
     }
 
@@ -253,19 +253,19 @@ inline FPResult NI_sub(PowerPC::PowerPCState& ppc_state, double a, double b)
 
   if (std::isnan(result.value))
   {
-    if (Common::IsSNAN(a) || Common::IsSNAN(b))
+    if (Core::IsSNAN(a) || Core::IsSNAN(b))
       result.SetException(ppc_state, FPSCR_VXSNAN);
 
     ppc_state.fpscr.ClearFIFR();
 
     if (std::isnan(a))
     {
-      result.value = Common::MakeQuiet(a);
+      result.value = Core::MakeQuiet(a);
       return result;
     }
     if (std::isnan(b))
     {
-      result.value = Common::MakeQuiet(b);
+      result.value = Core::MakeQuiet(b);
       return result;
     }
 
@@ -493,24 +493,24 @@ inline FPResult NI_madd_msub(PowerPC::PowerPCState& ppc_state, double a, double
 
   if (std::isnan(result.value))
   {
-    if (Common::IsSNAN(a) || Common::IsSNAN(b) || Common::IsSNAN(c))
+    if (Core::IsSNAN(a) || Core::IsSNAN(b) || Core::IsSNAN(c))
       result.SetException(ppc_state, FPSCR_VXSNAN);
 
     ppc_state.fpscr.ClearFIFR();
 
     if (std::isnan(a))
     {
-      result.value = Common::MakeQuiet(a);
+      result.value = Core::MakeQuiet(a);
       return result;
     }
     if (std::isnan(b))
     {
-      result.value = Common::MakeQuiet(b);  // !
+      result.value = Core::MakeQuiet(b);  // !
       return result;
     }
     if (std::isnan(c))
     {
-      result.value = Common::MakeQuiet(c);
+      result.value = Core::MakeQuiet(c);
       return result;
     }
 
@@ -542,13 +542,13 @@ inline u32 ConvertToSingle(u64 x)
 {
   const u32 exp = u32((x >> 52) & 0x7ff);
 
-  if (exp > 896 || (x & ~Common::DOUBLE_SIGN) == 0)
+  if (exp > 896 || (x & ~Core::DOUBLE_SIGN) == 0)
   {
     return u32(((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff));
   }
   else if (exp >= 874)
   {
-    u32 t = u32(0x80000000 | ((x & Common::DOUBLE_FRAC) >> 21));
+    u32 t = u32(0x80000000 | ((x & Core::DOUBLE_FRAC) >> 21));
     t = t >> (905 - exp);
     t |= u32((x >> 32) & 0x80000000);
     return t;
@@ -566,7 +566,7 @@ inline u32 ConvertToSingleFTZ(u64 x)
 {
   const u32 exp = u32((x >> 52) & 0x7ff);
 
-  if (exp > 896 || (x & ~Common::DOUBLE_SIGN) == 0)
+  if (exp > 896 || (x & ~Core::DOUBLE_SIGN) == 0)
   {
     return u32(((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff));
   }
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
index 641a2330b9..99b4137e9a 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@@ -7,7 +7,7 @@
 #include <utility>
 
 #include "Common/CommonTypes.h"
-#include "Common/FloatUtils.h"
+#include "Core/FloatUtils.h"
 #include "Core/PowerPC/Gekko.h"
 #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
 #include "Core/PowerPC/PowerPC.h"
@@ -85,7 +85,7 @@ void ConvertToInteger(PowerPC::PowerPCState& ppc_state, UGeckoInstruction inst,
 
   if (std::isnan(b))
   {
-    if (Common::IsSNAN(b))
+    if (Core::IsSNAN(b))
       SetFPException(ppc_state, FPSCR_VXSNAN);
 
     value = 0x80000000;
@@ -152,7 +152,7 @@ void Interpreter::Helper_FloatCompareOrdered(PowerPC::PowerPCState& ppc_state,
   if (std::isnan(fa) || std::isnan(fb))
   {
     compare_result = FPCC::FU;
-    if (Common::IsSNAN(fa) || Common::IsSNAN(fb))
+    if (Core::IsSNAN(fa) || Core::IsSNAN(fb))
     {
       SetFPException(ppc_state, FPSCR_VXSNAN);
       if (ppc_state.fpscr.VE == 0)
@@ -195,7 +195,7 @@ void Interpreter::Helper_FloatCompareUnordered(PowerPC::PowerPCState& ppc_state,
   {
     compare_result = FPCC::FU;
 
-    if (Common::IsSNAN(fa) || Common::IsSNAN(fb))
+    if (Core::IsSNAN(fa) || Core::IsSNAN(fb))
     {
       SetFPException(ppc_state, FPSCR_VXSNAN);
     }
@@ -316,7 +316,7 @@ void Interpreter::frspx(Interpreter& interpreter, UGeckoInstruction inst)  // ro
 
   if (std::isnan(b))
   {
-    const bool is_snan = Common::IsSNAN(b);
+    const bool is_snan = Core::IsSNAN(b);
 
     if (is_snan)
       SetFPException(ppc_state, FPSCR_VXSNAN);
@@ -516,7 +516,7 @@ void Interpreter::fresx(Interpreter& interpreter, UGeckoInstruction inst)
   const double b = ppc_state.ps[inst.FB].PS0AsDouble();
 
   const auto compute_result = [&ppc_state, inst](double value) {
-    const double result = Common::ApproximateReciprocal(value);
+    const double result = Core::ApproximateReciprocal(ppc_state.fpscr, value);
     ppc_state.ps[inst.FD].Fill(result);
     ppc_state.UpdateFPRFSingle(float(result));
   };
@@ -529,7 +529,7 @@ void Interpreter::fresx(Interpreter& interpreter, UGeckoInstruction inst)
     if (ppc_state.fpscr.ZE == 0)
       compute_result(b);
   }
-  else if (Common::IsSNAN(b))
+  else if (Core::IsSNAN(b))
   {
     SetFPException(ppc_state, FPSCR_VXSNAN);
     ppc_state.fpscr.ClearFIFR();
@@ -555,7 +555,7 @@ void Interpreter::frsqrtex(Interpreter& interpreter, UGeckoInstruction inst)
   const double b = ppc_state.ps[inst.FB].PS0AsDouble();
 
   const auto compute_result = [&ppc_state, inst](double value) {
-    const double result = Common::ApproximateReciprocalSquareRoot(value);
+    const double result = Core::ApproximateReciprocalSquareRoot(value);
     ppc_state.ps[inst.FD].SetPS0(result);
     ppc_state.UpdateFPRFDouble(result);
   };
@@ -576,7 +576,7 @@ void Interpreter::frsqrtex(Interpreter& interpreter, UGeckoInstruction inst)
     if (ppc_state.fpscr.ZE == 0)
       compute_result(b);
   }
-  else if (Common::IsSNAN(b))
+  else if (Core::IsSNAN(b))
   {
     SetFPException(ppc_state, FPSCR_VXSNAN);
     ppc_state.fpscr.ClearFIFR();
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
index 0e0a0eec06..1ac61d2f71 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
@@ -3,9 +3,10 @@
 
 #include "Core/PowerPC/Interpreter/Interpreter.h"
 
+#include <bit>
 #include <cmath>
 
-#include "Common/FloatUtils.h"
+#include "Core/FloatUtils.h"
 #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
 #include "Core/PowerPC/PowerPC.h"
 
@@ -17,8 +18,9 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& b = ppc_state.ps[inst.FB];
   const auto& c = ppc_state.ps[inst.FC];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(),
-                                a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble());
+  double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble();
+  double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble();
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissa(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -29,8 +31,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
   auto& ppc_state = interpreter.m_ppc_state;
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63),
-                                b.PS1AsU64() ^ (UINT64_C(1) << 63));
+  u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63);
+  u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63);
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -39,7 +42,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
 void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst)
 {
   auto& ppc_state = interpreter.m_ppc_state;
-  ppc_state.ps[inst.FD] = ppc_state.ps[inst.FB];
+  const auto& b = ppc_state.ps[inst.FB];
+
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissa(b.PS0AsDouble()), b.PS1AsDouble());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -50,8 +55,9 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst)
   auto& ppc_state = interpreter.m_ppc_state;
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63),
-                                b.PS1AsU64() | (UINT64_C(1) << 63));
+  u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63);
+  u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63);
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -62,8 +68,9 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst)
   auto& ppc_state = interpreter.m_ppc_state;
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63),
-                                b.PS1AsU64() & ~(UINT64_C(1) << 63));
+  u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63);
+  u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63);
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -76,7 +83,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS0AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS0AsU64()), b.PS0AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -88,7 +95,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS1AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS0AsU64()), b.PS1AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -100,7 +107,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS0AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS1AsU64()), b.PS0AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -112,7 +119,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS1AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS1AsU64()), b.PS1AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -142,7 +149,12 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst)
   // this code is based on the real hardware tests
   auto& ppc_state = interpreter.m_ppc_state;
   const double a = ppc_state.ps[inst.FB].PS0AsDouble();
-  const double b = ppc_state.ps[inst.FB].PS1AsDouble();
+  const double b = ppc_state.ps[inst.FB].PS1AsReciprocalDouble();
+
+  // The entire process of conditionally truncating the mantissa bits of the b register
+  // isn't needed, because ps_res never reads those bottom mantissa bits anyways when
+  // doing the operation on a standard input (e.g. not NaN nor Infinity).
+  // That is to say, the operation is 32-bit, compared to rsqrte which is 64-bit.
 
   if (a == 0.0 || b == 0.0)
   {
@@ -153,11 +165,11 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst)
   if (std::isnan(a) || std::isinf(a) || std::isnan(b) || std::isinf(b))
     ppc_state.fpscr.ClearFIFR();
 
-  if (Common::IsSNAN(a) || Common::IsSNAN(b))
+  if (Core::IsSNAN(a) || Core::IsSNAN(b))
     SetFPException(ppc_state, FPSCR_VXSNAN);
 
-  const double ps0 = Common::ApproximateReciprocal(a);
-  const double ps1 = Common::ApproximateReciprocal(b);
+  const double ps0 = Core::TruncateMantissa(Core::ApproximateReciprocal(ppc_state.fpscr, a));
+  const double ps1 = Core::ApproximateReciprocal(ppc_state.fpscr, b);
 
   ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
   ppc_state.UpdateFPRFSingle(float(ps0));
@@ -170,7 +182,15 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
 {
   auto& ppc_state = interpreter.m_ppc_state;
   const double ps0 = ppc_state.ps[inst.FB].PS0AsDouble();
-  const double ps1 = ppc_state.ps[inst.FB].PS1AsDouble();
+  double ps1 = ppc_state.ps[inst.FB].PS1AsReciprocalDouble();
+
+  if (std::bit_cast<s64>(ps1) > 0)
+  {
+    // If ps1 is < 0.0, we want the result to remain < 0.0 even for
+    // the smallest of subnormals which would otherwise be truncated to 0.0,
+    // specifically so the proper exception is set
+    ps1 = Core::TruncateMantissa(ps1);
+  }
 
   if (ps0 == 0.0 || ps1 == 0.0)
   {
@@ -187,11 +207,12 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
   if (std::isnan(ps0) || std::isinf(ps0) || std::isnan(ps1) || std::isinf(ps1))
     ppc_state.fpscr.ClearFIFR();
 
-  if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
+  if (Core::IsSNAN(ps0) || Core::IsSNAN(ps1))
     SetFPException(ppc_state, FPSCR_VXSNAN);
 
-  const float dst_ps0 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps0));
-  const float dst_ps1 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps1));
+  // For some reason ps0 is also truncated for this operation rather than rounded
+  const double dst_ps0 = Core::TruncateMantissa(Core::ApproximateReciprocalSquareRoot(ps0));
+  const double dst_ps1 = Core::ApproximateReciprocalSquareRoot(ps1);
 
   ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1);
   ppc_state.UpdateFPRFSingle(dst_ps0);
@@ -354,7 +375,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst)
 
   const float ps0 =
       ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
-  const float ps1 = ForceSingle(ppc_state.fpscr, c.PS1AsDouble());
+  const double ps1 = c.PS1AsDouble();
 
   ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
   ppc_state.UpdateFPRFSingle(ps0);
@@ -370,7 +391,8 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& b = ppc_state.ps[inst.FB];
   const auto& c = ppc_state.ps[inst.FC];
 
-  const float ps0 = ForceSingle(ppc_state.fpscr, c.PS0AsDouble());
+  // Rounds assuming ps0 is finite for some reason
+  const double ps0 = Core::RoundMantissaFinite(c.PS0AsDouble());
   const float ps1 =
       ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
 
diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
index 1019aa7d0a..ccc5ff7f8e 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
@@ -7,9 +7,9 @@
 
 #include "Common/Assert.h"
 #include "Common/CPUDetect.h"
-#include "Common/FloatUtils.h"
 #include "Common/Intrinsics.h"
 #include "Common/Swap.h"
+#include "Core/FloatUtils.h"
 #include "Core/HW/MMIO.h"
 #include "Core/HW/Memmap.h"
 #include "Core/PowerPC/Gekko.h"
@@ -919,13 +919,13 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
   MOVDDUP(dst, R(dst));
 }
 
-alignas(16) static const u64 psDoubleExp[2] = {Common::DOUBLE_EXP, 0};
-alignas(16) static const u64 psDoubleFrac[2] = {Common::DOUBLE_FRAC, 0};
-alignas(16) static const u64 psDoubleNoSign[2] = {~Common::DOUBLE_SIGN, 0};
+alignas(16) static const u64 psDoubleExp[2] = {Core::DOUBLE_EXP, 0};
+alignas(16) static const u64 psDoubleFrac[2] = {Core::DOUBLE_FRAC, 0};
+alignas(16) static const u64 psDoubleNoSign[2] = {~Core::DOUBLE_SIGN, 0};
 
-alignas(16) static const u32 psFloatExp[4] = {Common::FLOAT_EXP, 0, 0, 0};
-alignas(16) static const u32 psFloatFrac[4] = {Common::FLOAT_FRAC, 0, 0, 0};
-alignas(16) static const u32 psFloatNoSign[4] = {~Common::FLOAT_SIGN, 0, 0, 0};
+alignas(16) static const u32 psFloatExp[4] = {Core::FLOAT_EXP, 0, 0, 0};
+alignas(16) static const u32 psFloatFrac[4] = {Core::FLOAT_FRAC, 0, 0, 0};
+alignas(16) static const u32 psFloatNoSign[4] = {~Core::FLOAT_SIGN, 0, 0, 0};
 
 // TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer,
 // storing the result of each floating point op and calculating it when needed. This is trickier
@@ -951,9 +951,9 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
     FixupBranch maxExponent = J_CC(CC_C);
     FixupBranch zeroExponent = J_CC(CC_Z);
 
-    // Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
+    // Nice normalized number: sign ? Core::PPC_FPCLASS_NN : Core::PPC_FPCLASS_PN;
     LEA(32, RSCRATCH,
-        MScaled(RSCRATCH, Common::PPC_FPCLASS_NN - Common::PPC_FPCLASS_PN, Common::PPC_FPCLASS_PN));
+        MScaled(RSCRATCH, Core::PPC_FPCLASS_NN - Core::PPC_FPCLASS_PN, Core::PPC_FPCLASS_PN));
     continue1 = J();
 
     SetJumpTarget(maxExponent);
@@ -963,15 +963,14 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
       PTEST(xmm, MConst(psDoubleFrac));
     FixupBranch notNAN = J_CC(CC_Z);
 
-    // Max exponent + mantissa: PPC_FPCLASS_QNAN
-    MOV(32, R(RSCRATCH), Imm32(Common::PPC_FPCLASS_QNAN));
+    // Max exponent + mantissa: Core::PPC_FPCLASS_QNAN
+    MOV(32, R(RSCRATCH), Imm32(Core::PPC_FPCLASS_QNAN));
     continue2 = J();
 
-    // Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
+    // Max exponent + no mantissa: sign ? Core::PPC_FPCLASS_NINF : Core::PPC_FPCLASS_PINF;
     SetJumpTarget(notNAN);
     LEA(32, RSCRATCH,
-        MScaled(RSCRATCH, Common::PPC_FPCLASS_NINF - Common::PPC_FPCLASS_PINF,
-                Common::PPC_FPCLASS_PINF));
+        MScaled(RSCRATCH, Core::PPC_FPCLASS_NINF - Core::PPC_FPCLASS_PINF, Core::PPC_FPCLASS_PINF));
     continue3 = J();
 
     SetJumpTarget(zeroExponent);
@@ -981,29 +980,29 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
       PTEST(xmm, MConst(psDoubleNoSign));
     FixupBranch zero = J_CC(CC_Z);
 
-    // No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
+    // No exponent + mantissa: sign ? Core::PPC_FPCLASS_ND : Core::PPC_FPCLASS_PD;
     LEA(32, RSCRATCH,
-        MScaled(RSCRATCH, Common::PPC_FPCLASS_ND - Common::PPC_FPCLASS_PD, Common::PPC_FPCLASS_PD));
+        MScaled(RSCRATCH, Core::PPC_FPCLASS_ND - Core::PPC_FPCLASS_PD, Core::PPC_FPCLASS_PD));
     continue4 = J();
 
-    // Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
+    // Zero: sign ? Core::PPC_FPCLASS_NZ : Core::PPC_FPCLASS_PZ;
     SetJumpTarget(zero);
     SHL(32, R(RSCRATCH), Imm8(4));
-    ADD(32, R(RSCRATCH), Imm8(Common::PPC_FPCLASS_PZ));
+    ADD(32, R(RSCRATCH), Imm8(Core::PPC_FPCLASS_PZ));
   }
   else
   {
     MOVQ_xmm(R(RSCRATCH), xmm);
     if (single)
-      TEST(32, R(RSCRATCH), Imm32(Common::FLOAT_EXP));
+      TEST(32, R(RSCRATCH), Imm32(Core::FLOAT_EXP));
     else
       TEST(64, R(RSCRATCH), MConst(psDoubleExp));
     FixupBranch zeroExponent = J_CC(CC_Z);
 
     if (single)
     {
-      AND(32, R(RSCRATCH), Imm32(~Common::FLOAT_SIGN));
-      CMP(32, R(RSCRATCH), Imm32(Common::FLOAT_EXP));
+      AND(32, R(RSCRATCH), Imm32(~Core::FLOAT_SIGN));
+      CMP(32, R(RSCRATCH), Imm32(Core::FLOAT_EXP));
     }
     else
     {
@@ -1017,37 +1016,36 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
     MOVQ_xmm(R(RSCRATCH), xmm);
     SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
     LEA(32, RSCRATCH,
-        MScaled(RSCRATCH, Common::PPC_FPCLASS_NN - Common::PPC_FPCLASS_PN, Common::PPC_FPCLASS_PN));
+        MScaled(RSCRATCH, Core::PPC_FPCLASS_NN - Core::PPC_FPCLASS_PN, Core::PPC_FPCLASS_PN));
     continue1 = J();
 
     SetJumpTarget(nan);
-    MOV(32, R(RSCRATCH), Imm32(Common::PPC_FPCLASS_QNAN));
+    MOV(32, R(RSCRATCH), Imm32(Core::PPC_FPCLASS_QNAN));
     continue2 = J();
 
     SetJumpTarget(infinity);
     MOVQ_xmm(R(RSCRATCH), xmm);
     SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
     LEA(32, RSCRATCH,
-        MScaled(RSCRATCH, Common::PPC_FPCLASS_NINF - Common::PPC_FPCLASS_PINF,
-                Common::PPC_FPCLASS_PINF));
+        MScaled(RSCRATCH, Core::PPC_FPCLASS_NINF - Core::PPC_FPCLASS_PINF, Core::PPC_FPCLASS_PINF));
     continue3 = J();
 
     SetJumpTarget(zeroExponent);
     if (single)
-      TEST(input_size, R(RSCRATCH), Imm32(~Common::FLOAT_SIGN));
+      TEST(input_size, R(RSCRATCH), Imm32(~Core::FLOAT_SIGN));
     else
       TEST(input_size, R(RSCRATCH), MConst(psDoubleNoSign));
     FixupBranch zero = J_CC(CC_Z);
 
     SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
     LEA(32, RSCRATCH,
-        MScaled(RSCRATCH, Common::PPC_FPCLASS_ND - Common::PPC_FPCLASS_PD, Common::PPC_FPCLASS_PD));
+        MScaled(RSCRATCH, Core::PPC_FPCLASS_ND - Core::PPC_FPCLASS_PD, Core::PPC_FPCLASS_PD));
     continue4 = J();
 
     SetJumpTarget(zero);
     SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
     SHL(32, R(RSCRATCH), Imm8(4));
-    ADD(32, R(RSCRATCH), Imm8(Common::PPC_FPCLASS_PZ));
+    ADD(32, R(RSCRATCH), Imm8(Core::PPC_FPCLASS_PZ));
   }
 
   SetJumpTarget(continue1);
diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
index 4e7e6fe076..c5dd2bd9dd 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -13,6 +13,7 @@
 #include "Common/JitRegister.h"
 #include "Common/x64ABI.h"
 #include "Common/x64Emitter.h"
+#include "Core/FloatUtils.h"
 #include "Core/PowerPC/Gekko.h"
 #include "Core/PowerPC/Jit64Common/Jit64Constants.h"
 #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
@@ -140,20 +141,20 @@ void CommonAsmRoutines::GenFrsqrte()
   AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F));
 
   PUSH(RSCRATCH2);
-  MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(Common::frsqrte_expected)));
-  static_assert(sizeof(Common::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
+  MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(Core::frsqrte_expected)));
+  static_assert(sizeof(Core::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
 
   SHR(64, R(RSCRATCH), Imm8(37));
   AND(32, R(RSCRATCH), Imm32(0x7FF));
   IMUL(32, RSCRATCH,
-       MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Common::BaseAndDec, m_dec)));
+       MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Core::BaseAndDec, m_dec)));
   ADD(32, R(RSCRATCH),
-      MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Common::BaseAndDec, m_base)));
+      MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Core::BaseAndDec, m_base)));
   SHL(64, R(RSCRATCH), Imm8(26));
 
   POP(RSCRATCH2);
-  OR(64, R(RSCRATCH2), R(RSCRATCH));  // vali |= (s64)(frsqrte_expected_base[index] +
-                                      // frsqrte_expected_dec[index] * (i % 2048)) << 26;
+  OR(64, R(RSCRATCH2), R(RSCRATCH));  // vali |= (s64)(Core::frsqrte_expected_base[index] +
+                                      // Core::frsqrte_expected_dec[index] * (i % 2048)) << 26;
   MOVQ_xmm(XMM0, R(RSCRATCH2));
   RET();
 
@@ -205,7 +206,7 @@ void CommonAsmRoutines::GenFrsqrte()
 
   SetJumpTarget(denormal);
   ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
-  ABI_CallFunction(Common::ApproximateReciprocalSquareRoot);
+  ABI_CallFunction(Core::ApproximateReciprocalSquareRoot);
   ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
   RET();
 
@@ -246,25 +247,23 @@ void CommonAsmRoutines::GenFres()
   AND(32, R(RSCRATCH2), Imm8(0x1F));   // i / 1024
 
   PUSH(RSCRATCH_EXTRA);
-  MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(Common::fres_expected)));
-  static_assert(sizeof(Common::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
+  MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(Core::fres_expected)));
+  static_assert(sizeof(Core::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
 
   IMUL(32, RSCRATCH,
-       MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_dec)));
-  ADD(32, R(RSCRATCH), Imm8(1));
-  SHR(32, R(RSCRATCH), Imm8(1));
+       MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Core::BaseAndDec, m_dec)));
 
-  MOV(32, R(RSCRATCH2),
-      MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_base)));
-  SUB(32, R(RSCRATCH2), R(RSCRATCH));
-  SHL(64, R(RSCRATCH2), Imm8(29));
+  ADD(32, R(RSCRATCH),
+      MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Core::BaseAndDec, m_base)));
+  SHR(32, R(RSCRATCH), Imm8(1));
+  SHL(64, R(RSCRATCH), Imm8(29));
 
   POP(RSCRATCH_EXTRA);
 
-  OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));  // vali |= (s64)(fres_expected_base[i / 1024] -
-                                            // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
-                                            // << 29
-  MOVQ_xmm(XMM0, R(RSCRATCH2));
+  OR(64, R(RSCRATCH),
+     R(RSCRATCH_EXTRA));  // vali |= (s64)((u64)(Core::fres_expected_base[i / 1024] +
+                          // (Core::fres_expected_dec[i / 1024] * (i % 1024)) / 2)) << 29
+  MOVQ_xmm(XMM0, R(RSCRATCH));
   RET();
 
   // Exception flags for zero input.
@@ -276,7 +275,8 @@ void CommonAsmRoutines::GenFres()
 
   SetJumpTarget(complex);
   ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
-  ABI_CallFunction(Common::ApproximateReciprocal);
+  LEA(64, ABI_PARAM1, PPCSTATE(fpscr));
+  ABI_CallFunction(Core::ApproximateReciprocal);
   ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
   RET();
 
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index a65fd33a8f..3479a65224 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -17,6 +17,7 @@
 
 #include "Core/Config/MainSettings.h"
 #include "Core/CoreTiming.h"
+#include "Core/FloatUtils.h"
 #include "Core/HW/CPU.h"
 #include "Core/HW/Memmap.h"
 #include "Core/PowerPC/Gekko.h"
@@ -289,7 +290,7 @@ void JitArm64::GenerateFres()
 
   UBFX(ARM64Reg::X2, ARM64Reg::X1, 52, 11);  // Grab the exponent
   m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
-  AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Common::DOUBLE_SIGN, GPRSize::B64));
+  AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
   CMP(ARM64Reg::X2, 895);
   FixupBranch small_exponent = B(CCFlags::CC_LO);
 
@@ -297,20 +298,18 @@ void JitArm64::GenerateFres()
   FixupBranch large_exponent = B(CCFlags::CC_HI);
 
   UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5);  // Grab upper part of mantissa
-  MOVP2R(ARM64Reg::X3, &Common::fres_expected);
+  MOVP2R(ARM64Reg::X3, &Core::fres_expected);
   ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
   UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10);  // Grab lower part of mantissa
   LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0);
-  MOVI2R(ARM64Reg::W4, 1);
-  MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4);
-  SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1));
-  AND(ARM64Reg::X0, ARM64Reg::X0,
-      LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64));
+  MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W2);
+  AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
+  LSR(ARM64Reg::W1, ARM64Reg::W1, 1);
   ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
   RET();
 
   SetJumpTarget(small_exponent);
-  TST(ARM64Reg::X1, LogicalImm(Common::DOUBLE_EXP | Common::DOUBLE_FRAC, GPRSize::B64));
+  TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_EXP | Core::DOUBLE_FRAC, GPRSize::B64));
   FixupBranch zero = B(CCFlags::CC_EQ);
   MOVI2R(ARM64Reg::X4, std::bit_cast<u64>(static_cast<double>(std::numeric_limits<float>::max())));
   ORR(ARM64Reg::X0, ARM64Reg::X3, ARM64Reg::X4);
@@ -342,18 +341,17 @@ void JitArm64::GenerateFrsqrte()
   LSL(ARM64Reg::X2, ARM64Reg::X1, 1);
   m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
   CLS(ARM64Reg::X3, ARM64Reg::X2);
-  TST(ARM64Reg::X1, LogicalImm(Common::DOUBLE_SIGN, GPRSize::B64));
-  CCMP(ARM64Reg::X3, Common::DOUBLE_EXP_WIDTH - 1, 0b0010, CCFlags::CC_EQ);
+  TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
+  CCMP(ARM64Reg::X3, Core::DOUBLE_EXP_WIDTH - 1, 0b0010, CCFlags::CC_EQ);
   FixupBranch not_positive_normal = B(CCFlags::CC_HS);
 
   const u8* positive_normal = GetCodePtr();
   UBFX(ARM64Reg::X2, ARM64Reg::X1, 48, 5);
-  MOVP2R(ARM64Reg::X3, &Common::frsqrte_expected);
+  MOVP2R(ARM64Reg::X3, &Core::frsqrte_expected);
   ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
   LDP(IndexType::Signed, ARM64Reg::W3, ARM64Reg::W2, ARM64Reg::X2, 0);
   UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 11);
-  AND(ARM64Reg::X0, ARM64Reg::X0,
-      LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64));
+  AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
   MADD(ARM64Reg::W1, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
   ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 26));
   RET();
@@ -472,8 +470,8 @@ void JitArm64::GenerateFPRF(bool single)
   constexpr ARM64Reg fpscr_reg = ARM64Reg::W4;
 
   const int input_size = single ? 32 : 64;
-  const int input_exp_size = single ? Common::FLOAT_EXP_WIDTH : Common::DOUBLE_EXP_WIDTH;
-  const u64 input_frac_mask = single ? Common::FLOAT_FRAC : Common::DOUBLE_FRAC;
+  const int input_exp_size = single ? Core::FLOAT_EXP_WIDTH : Core::DOUBLE_EXP_WIDTH;
+  const u64 input_frac_mask = single ? Core::FLOAT_FRAC : Core::DOUBLE_FRAC;
   constexpr u32 output_sign_mask = 0xC;
 
   // First of all, start the load of the old FPSCR value, in case it takes a while
@@ -484,8 +482,8 @@ void JitArm64::GenerateFPRF(bool single)
   FixupBranch not_zero = CBNZ(cls_reg);
 
   // exp == 0 && frac == 0
-  MOVI2R(ARM64Reg::W3, Common::PPC_FPCLASS_PZ);
-  MOVI2R(ARM64Reg::W1, Common::PPC_FPCLASS_NZ);
+  MOVI2R(ARM64Reg::W3, Core::PPC_FPCLASS_PZ);
+  MOVI2R(ARM64Reg::W1, Core::PPC_FPCLASS_NZ);
   CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W3, CCFlags::CC_LT);
 
   const u8* write_fprf_and_ret = GetCodePtr();
@@ -499,8 +497,8 @@ void JitArm64::GenerateFPRF(bool single)
 
   // All branches except the zero branch handle the sign in the same way.
   // Perform that handling before branching further
-  MOVI2R(ARM64Reg::W3, Common::PPC_FPCLASS_PN);
-  MOVI2R(ARM64Reg::W1, Common::PPC_FPCLASS_NN);
+  MOVI2R(ARM64Reg::W3, Core::PPC_FPCLASS_PN);
+  MOVI2R(ARM64Reg::W1, Core::PPC_FPCLASS_NN);
   CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W3, CCFlags::CC_LT);
 
   CMP(cls_reg, input_exp_size - 1);
@@ -510,14 +508,13 @@ void JitArm64::GenerateFPRF(bool single)
   FixupBranch nan_or_inf = TBNZ(input_reg, input_size - 2);
 
   // exp == 0 && frac != 0
-  ORR(fprf_reg, fprf_reg, LogicalImm(Common::PPC_FPCLASS_PD & ~output_sign_mask, GPRSize::B32));
+  ORR(fprf_reg, fprf_reg, LogicalImm(Core::PPC_FPCLASS_PD & ~output_sign_mask, GPRSize::B32));
   B(write_fprf_and_ret);
 
   // exp == EXP_MASK
   SetJumpTarget(nan_or_inf);
-  MOVI2R(ARM64Reg::W2, Common::PPC_FPCLASS_QNAN);
-  ORR(ARM64Reg::W1, fprf_reg,
-      LogicalImm(Common::PPC_FPCLASS_PINF & ~output_sign_mask, GPRSize::B32));
+  MOVI2R(ARM64Reg::W2, Core::PPC_FPCLASS_QNAN);
+  ORR(ARM64Reg::W1, fprf_reg, LogicalImm(Core::PPC_FPCLASS_PINF & ~output_sign_mask, GPRSize::B32));
   TST(input_reg, LogicalImm(input_frac_mask, single ? GPRSize::B32 : GPRSize::B64));
   CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W2, CCFlags::CC_EQ);
   B(write_fprf_and_ret);
diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp
index 87a047670e..c88229f0af 100644
--- a/Source/Core/Core/PowerPC/PowerPC.cpp
+++ b/Source/Core/Core/PowerPC/PowerPC.cpp
@@ -11,13 +11,13 @@
 #include "Common/ChunkFile.h"
 #include "Common/CommonTypes.h"
 #include "Common/FPURoundMode.h"
-#include "Common/FloatUtils.h"
 #include "Common/Logging/Log.h"
 
 #include "Core/CPUThreadConfigCallback.h"
 #include "Core/Config/MainSettings.h"
 #include "Core/Core.h"
 #include "Core/CoreTiming.h"
+#include "Core/FloatUtils.h"
 #include "Core/HW/CPU.h"
 #include "Core/HW/SystemTimers.h"
 #include "Core/Host.h"
@@ -38,7 +38,25 @@ double PairedSingle::PS0AsDouble() const
 
 double PairedSingle::PS1AsDouble() const
 {
-  return std::bit_cast<double>(ps1);
+  return Core::TruncateMantissa(std::bit_cast<double>(ps1));
+}
+
+// If ps1 would get truncated to 0 if read as a raw value, set the sign
+// of the input for reciprocal operations
+// It's not exactly clear why this happens, but that's also why PS1 is
+// truncated on read rather than on write
+double PairedSingle::PS1AsReciprocalDouble() const
+{
+  constexpr u64 trunc_bits = Core::DOUBLE_FRAC_WIDTH - Core::FLOAT_FRAC_WIDTH;
+  constexpr u64 trunc_mask = (1 << trunc_bits) - 1;
+
+  u64 bits = ps1;
+  if ((ps1 & ~(trunc_mask | Core::DOUBLE_SIGN)) == 0 && (ps1 & trunc_mask) != 0)
+  {
+    bits |= Core::DOUBLE_SIGN;
+  }
+
+  return std::bit_cast<double>(bits);
 }
 
 void PairedSingle::SetPS0(double value)
@@ -680,12 +698,12 @@ void PowerPCState::SetSR(u32 index, u32 value)
 
 void PowerPCState::UpdateFPRFDouble(double dvalue)
 {
-  fpscr.FPRF = Common::ClassifyDouble(dvalue);
+  fpscr.FPRF = Core::ClassifyDouble(dvalue);
 }
 
 void PowerPCState::UpdateFPRFSingle(float fvalue)
 {
-  fpscr.FPRF = Common::ClassifyFloat(fvalue);
+  fpscr.FPRF = Core::ClassifyFloat(fvalue);
 }
 
 void RoundingModeUpdated(PowerPCState& ppc_state)
diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h
index ea972590cb..87aeeeae6f 100644
--- a/Source/Core/Core/PowerPC/PowerPC.h
+++ b/Source/Core/Core/PowerPC/PowerPC.h
@@ -15,6 +15,7 @@
 #include "Core/CPUThreadConfigCallback.h"
 #include "Core/Debugger/BranchWatch.h"
 #include "Core/Debugger/PPCDebugInterface.h"
+#include "Core/FloatUtils.h"
 #include "Core/PowerPC/BreakPoints.h"
 #include "Core/PowerPC/ConditionRegister.h"
 #include "Core/PowerPC/Gekko.h"
@@ -70,14 +71,21 @@ struct TLBEntry
 
 struct PairedSingle
 {
+  // By default, truncate PS1
+  // Due to reciprocal operations having a quirk in which the sign
+  // of the input PS1 is set if the value in it beforehand would
+  // be truncated to 0, setting PS1 then only truncating it on read
+  // operations simply works easier than creating an entire flag
+  // for this specific case
   u64 PS0AsU64() const { return ps0; }
-  u64 PS1AsU64() const { return ps1; }
+  u64 PS1AsU64() const { return Core::TruncateMantissaBits(ps1); }
 
   u32 PS0AsU32() const { return static_cast<u32>(ps0); }
   u32 PS1AsU32() const { return static_cast<u32>(ps1); }
 
   double PS0AsDouble() const;
   double PS1AsDouble() const;
+  double PS1AsReciprocalDouble() const;
 
   void SetPS0(u64 value) { ps0 = value; }
   void SetPS0(double value);
diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props
index 30ff8d1c7e..e12fd36b82 100644
--- a/Source/Core/DolphinLib.props
+++ b/Source/Core/DolphinLib.props
@@ -63,7 +63,6 @@
     <ClInclude Include="Common\FileUtil.h" />
     <ClInclude Include="Common\FixedSizeQueue.h" />
     <ClInclude Include="Common\Flag.h" />
-    <ClInclude Include="Common\FloatUtils.h" />
     <ClInclude Include="Common\FormatUtil.h" />
     <ClInclude Include="Common\FPURoundMode.h" />
     <ClInclude Include="Common\Functional.h" />
@@ -248,6 +247,7 @@
     <ClInclude Include="Core\FifoPlayer\FifoDataFile.h" />
     <ClInclude Include="Core\FifoPlayer\FifoPlayer.h" />
     <ClInclude Include="Core\FifoPlayer\FifoRecorder.h" />
+    <ClInclude Include="Core\FloatUtils.h" />
     <ClInclude Include="Core\FreeLookManager.h" />
     <ClInclude Include="Core\GeckoCode.h" />
     <ClInclude Include="Core\GeckoCodeConfig.h" />
@@ -838,7 +838,6 @@
     <ClCompile Include="Common\FileSearch.cpp" />
     <ClCompile Include="Common\FilesystemWatcher.cpp" />
     <ClCompile Include="Common\FileUtil.cpp" />
-    <ClCompile Include="Common\FloatUtils.cpp" />
     <ClCompile Include="Common\GekkoDisassembler.cpp" />
     <ClCompile Include="Common\GL\GLContext.cpp" />
     <ClCompile Include="Common\GL\GLExtensions\GLExtensions.cpp" />
@@ -943,6 +942,7 @@
     <ClCompile Include="Core\FifoPlayer\FifoDataFile.cpp" />
     <ClCompile Include="Core\FifoPlayer\FifoPlayer.cpp" />
     <ClCompile Include="Core\FifoPlayer\FifoRecorder.cpp" />
+    <ClCompile Include="Core\FloatUtils.cpp" />
     <ClCompile Include="Core\FreeLookManager.cpp" />
     <ClCompile Include="Core\GeckoCode.cpp" />
     <ClCompile Include="Core\GeckoCodeConfig.cpp" />
diff --git a/Source/Core/DolphinQt/Debugger/MemoryViewWidget.cpp b/Source/Core/DolphinQt/Debugger/MemoryViewWidget.cpp
index a5f5ef6eea..d25e71f717 100644
--- a/Source/Core/DolphinQt/Debugger/MemoryViewWidget.cpp
+++ b/Source/Core/DolphinQt/Debugger/MemoryViewWidget.cpp
@@ -24,6 +24,7 @@
 #include "Common/StringUtil.h"
 #include "Common/Swap.h"
 #include "Core/Core.h"
+#include "Core/FloatUtils.h"
 #include "Core/HW/AddressSpace.h"
 #include "Core/PowerPC/BreakPoints.h"
 #include "Core/PowerPC/PPCSymbolDB.h"
diff --git a/Source/UnitTests/Common/CMakeLists.txt b/Source/UnitTests/Common/CMakeLists.txt
index a2e21d7a80..e51d57aa68 100644
--- a/Source/UnitTests/Common/CMakeLists.txt
+++ b/Source/UnitTests/Common/CMakeLists.txt
@@ -12,7 +12,6 @@ add_dolphin_test(EventTest EventTest.cpp)
 add_dolphin_test(FileUtilTest FileUtilTest.cpp)
 add_dolphin_test(FixedSizeQueueTest FixedSizeQueueTest.cpp)
 add_dolphin_test(FlagTest FlagTest.cpp)
-add_dolphin_test(FloatUtilsTest FloatUtilsTest.cpp)
 add_dolphin_test(MathUtilTest MathUtilTest.cpp)
 add_dolphin_test(MutexTest MutexTest.cpp)
 add_dolphin_test(NandPathsTest NandPathsTest.cpp)
diff --git a/Source/UnitTests/Core/CMakeLists.txt b/Source/UnitTests/Core/CMakeLists.txt
index 8725995729..f83c5acf87 100644
--- a/Source/UnitTests/Core/CMakeLists.txt
+++ b/Source/UnitTests/Core/CMakeLists.txt
@@ -2,6 +2,7 @@ add_dolphin_test(MMIOTest MMIOTest.cpp)
 add_dolphin_test(PageFaultTest PageFaultTest.cpp)
 add_dolphin_test(CoreTimingTest CoreTimingTest.cpp)
 add_dolphin_test(PatchAllowlistTest PatchAllowlistTest.cpp)
+add_dolphin_test(FloatUtilsTest FloatUtilsTest.cpp)
 
 add_dolphin_test(DSPAcceleratorTest DSP/DSPAcceleratorTest.cpp)
 add_dolphin_test(DSPAssemblyTest
diff --git a/Source/UnitTests/Common/FloatUtilsTest.cpp b/Source/UnitTests/Core/FloatUtilsTest.cpp
similarity index 69%
rename from Source/UnitTests/Common/FloatUtilsTest.cpp
rename to Source/UnitTests/Core/FloatUtilsTest.cpp
index 8afc698a47..8edb882dda 100644
--- a/Source/UnitTests/Common/FloatUtilsTest.cpp
+++ b/Source/UnitTests/Core/FloatUtilsTest.cpp
@@ -8,20 +8,20 @@
 
 #include <gtest/gtest.h>
 
-#include "Common/FloatUtils.h"
+#include "Core/FloatUtils.h"
 
 #include "../Core/PowerPC/TestValues.h"
 
 TEST(FloatUtils, IsQNAN)
 {
-  EXPECT_TRUE(Common::IsQNAN(std::numeric_limits<double>::quiet_NaN()));
-  EXPECT_FALSE(Common::IsQNAN(Common::SNANConstant<double>()));
+  EXPECT_TRUE(Core::IsQNAN(std::numeric_limits<double>::quiet_NaN()));
+  EXPECT_FALSE(Core::IsQNAN(Core::SNANConstant<double>()));
 }
 
 TEST(FloatUtils, IsSNAN)
 {
-  EXPECT_FALSE(Common::IsSNAN(std::numeric_limits<double>::quiet_NaN()));
-  EXPECT_TRUE(Common::IsSNAN(Common::SNANConstant<double>()));
+  EXPECT_FALSE(Core::IsSNAN(std::numeric_limits<double>::quiet_NaN()));
+  EXPECT_TRUE(Core::IsSNAN(Core::SNANConstant<double>()));
 }
 
 TEST(FloatUtils, FlushToZero)
@@ -34,18 +34,18 @@ TEST(FloatUtils, FlushToZero)
   EXPECT_LT(0.f, s * 2);
   EXPECT_LT(0.0, d * 2);
 
-  EXPECT_EQ(+0.0, Common::FlushToZero(+std::numeric_limits<double>::denorm_min()));
-  EXPECT_EQ(-0.0, Common::FlushToZero(-std::numeric_limits<double>::denorm_min()));
-  EXPECT_EQ(+0.0, Common::FlushToZero(+std::numeric_limits<double>::min() / 2));
-  EXPECT_EQ(-0.0, Common::FlushToZero(-std::numeric_limits<double>::min() / 2));
+  EXPECT_EQ(+0.0, Core::FlushToZero(+std::numeric_limits<double>::denorm_min()));
+  EXPECT_EQ(-0.0, Core::FlushToZero(-std::numeric_limits<double>::denorm_min()));
+  EXPECT_EQ(+0.0, Core::FlushToZero(+std::numeric_limits<double>::min() / 2));
+  EXPECT_EQ(-0.0, Core::FlushToZero(-std::numeric_limits<double>::min() / 2));
   EXPECT_EQ(std::numeric_limits<double>::min(),
-            Common::FlushToZero(std::numeric_limits<double>::min()));
+            Core::FlushToZero(std::numeric_limits<double>::min()));
   EXPECT_EQ(std::numeric_limits<double>::max(),
-            Common::FlushToZero(std::numeric_limits<double>::max()));
+            Core::FlushToZero(std::numeric_limits<double>::max()));
   EXPECT_EQ(+std::numeric_limits<double>::infinity(),
-            Common::FlushToZero(+std::numeric_limits<double>::infinity()));
+            Core::FlushToZero(+std::numeric_limits<double>::infinity()));
   EXPECT_EQ(-std::numeric_limits<double>::infinity(),
-            Common::FlushToZero(-std::numeric_limits<double>::infinity()));
+            Core::FlushToZero(-std::numeric_limits<double>::infinity()));
 
   // Test all subnormals as well as an equally large set of random normal floats.
   std::default_random_engine engine(0);
@@ -53,16 +53,16 @@ TEST(FloatUtils, FlushToZero)
   for (u32 i = 0; i <= 0x007fffffu; ++i)
   {
     u32 i_tmp = i;
-    EXPECT_EQ(+0.f, Common::FlushToZero(std::bit_cast<float>(i_tmp)));
+    EXPECT_EQ(+0.f, Core::FlushToZero(std::bit_cast<float>(i_tmp)));
 
     i_tmp |= 0x80000000u;
-    EXPECT_EQ(-0.f, Common::FlushToZero(std::bit_cast<float>(i_tmp)));
+    EXPECT_EQ(-0.f, Core::FlushToZero(std::bit_cast<float>(i_tmp)));
 
     i_tmp = dist(engine);
-    EXPECT_EQ(i_tmp, std::bit_cast<u32>(Common::FlushToZero(std::bit_cast<float>(i_tmp))));
+    EXPECT_EQ(i_tmp, std::bit_cast<u32>(Core::FlushToZero(std::bit_cast<float>(i_tmp))));
 
     i_tmp |= 0x80000000u;
-    EXPECT_EQ(i_tmp, std::bit_cast<u32>(Common::FlushToZero(std::bit_cast<float>(i_tmp))));
+    EXPECT_EQ(i_tmp, std::bit_cast<u32>(Core::FlushToZero(std::bit_cast<float>(i_tmp))));
   }
 }
 
@@ -92,7 +92,7 @@ TEST(FloatUtils, ApproximateReciprocalSquareRoot)
 
     u64 expected = expected_values[i];
 
-    u64 actual = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));
+    u64 actual = std::bit_cast<u64>(Core::ApproximateReciprocalSquareRoot(dvalue));
 
     EXPECT_EQ(expected, actual);
   }
diff --git a/Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp b/Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp
index 5af80075f0..c4cbbd855f 100644
--- a/Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp
+++ b/Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp
@@ -4,10 +4,10 @@
 #include <bit>
 
 #include "Common/CommonTypes.h"
-#include "Common/FloatUtils.h"
 #include "Common/ScopeGuard.h"
 #include "Common/x64ABI.h"
 #include "Core/Core.h"
+#include "Core/FloatUtils.h"
 #include "Core/PowerPC/Gekko.h"
 #include "Core/PowerPC/Jit64/Jit.h"
 #include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h"
@@ -67,7 +67,7 @@ TEST(Jit64, Frsqrte)
   {
     const double dvalue = std::bit_cast<double>(ivalue);
 
-    u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));
+    u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocalSquareRoot(dvalue));
 
     u64 actual = routines.wrapped_frsqrte(ivalue, fpscr);
 
diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
index cf132215ff..5815712f02 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
@@ -60,11 +60,14 @@ TEST(JitArm64, Fres)
 
   TestFres test(Core::System::GetInstance());
 
+  // FPSCR with NI set
+  const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004);
+
   for (const u64 ivalue : double_test_values)
   {
     const double dvalue = std::bit_cast<double>(ivalue);
 
-    const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(dvalue));
+    const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocal(fpscr, dvalue));
     const u64 actual = test.fres(ivalue);
 
     if (expected != actual)
diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp
index a6e2940343..1534cec0c5 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp
@@ -63,7 +63,7 @@ TEST(JitArm64, Frsqrte)
   {
     const double dvalue = std::bit_cast<double>(ivalue);
 
-    const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));
+    const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocalSquareRoot(dvalue));
     const u64 actual = test.frsqrte(ivalue);
 
     if (expected != actual)
diff --git a/Source/UnitTests/UnitTests.vcxproj b/Source/UnitTests/UnitTests.vcxproj
index 5ffba903d9..6f0a9fe037 100644
--- a/Source/UnitTests/UnitTests.vcxproj
+++ b/Source/UnitTests/UnitTests.vcxproj
@@ -51,7 +51,6 @@
     <ClCompile Include="Common\FileUtilTest.cpp" />
     <ClCompile Include="Common\FixedSizeQueueTest.cpp" />
     <ClCompile Include="Common\FlagTest.cpp" />
-    <ClCompile Include="Common\FloatUtilsTest.cpp" />
     <ClCompile Include="Common\MathUtilTest.cpp" />
     <ClCompile Include="Common\MutexTest.cpp" />
     <ClCompile Include="Common\NandPathsTest.cpp" />
@@ -67,6 +66,7 @@
     <ClCompile Include="Core\DSP\DSPTestText.cpp" />
     <ClCompile Include="Core\DSP\HermesBinary.cpp" />
     <ClCompile Include="Core\DSP\HermesText.cpp" />
+    <ClCompile Include="Core\FloatUtilsTest.cpp" />
     <ClCompile Include="Core\IOS\ES\FormatsTest.cpp" />
     <ClCompile Include="Core\IOS\FS\FileSystemTest.cpp" />
     <ClCompile Include="Core\IOS\USB\SkylandersTest.cpp" />

From 5f9abe472d46e7848a9877f33392abbf7891ceb7 Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Fri, 23 Jan 2026 20:56:36 -0600
Subject: [PATCH 02/14] Attempt to make Aarch64 Fres consistent with x86

This is done by calling a fallback function for large or small register inputs. This, for example, fixes NI not being checked, among other things if I remember correctly
---
 .../PowerPC/Jit64Common/Jit64AsmCommon.cpp    |  3 +-
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp  | 36 ++++++++++---------
 .../Core/PowerPC/Jit64Common/Fres.cpp         |  3 +-
 .../UnitTests/Core/PowerPC/JitArm64/Fres.cpp  |  1 -
 .../Core/PowerPC/JitArm64/Frsqrte.cpp         |  1 -
 5 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
index c5dd2bd9dd..c6e89c3cc8 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -8,7 +8,6 @@
 
 #include "Common/CPUDetect.h"
 #include "Common/CommonTypes.h"
-#include "Common/FloatUtils.h"
 #include "Common/Intrinsics.h"
 #include "Common/JitRegister.h"
 #include "Common/x64ABI.h"
@@ -269,7 +268,7 @@ void CommonAsmRoutines::GenFres()
   // Exception flags for zero input.
   SetJumpTarget(zero);
   TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
-  FixupBranch skip_set_fx1 = J_CC(CC_NZ);
+  FixupBranch skip_set_fx1 = J_CC(CC_NZ);  // As of now, the JIT does not check for ZE
   OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX));
   SetJumpTarget(skip_set_fx1);
 
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index 3479a65224..eecf410134 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -11,7 +11,6 @@
 #include "Common/CPUDetect.h"
 #include "Common/CommonTypes.h"
 #include "Common/Config/Config.h"
-#include "Common/FloatUtils.h"
 #include "Common/JitRegister.h"
 #include "Common/MathUtil.h"
 
@@ -291,11 +290,11 @@ void JitArm64::GenerateFres()
   UBFX(ARM64Reg::X2, ARM64Reg::X1, 52, 11);  // Grab the exponent
   m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
   AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
-  CMP(ARM64Reg::X2, 895);
-  FixupBranch small_exponent = B(CCFlags::CC_LO);
-
-  CMP(ARM64Reg::X2, 1148);
-  FixupBranch large_exponent = B(CCFlags::CC_HI);
+  SUB(ARM64Reg::X2, ARM64Reg::X2, 895);
+  CMP(ARM64Reg::X2, 1148 - 895);
+  // Take the complex path for very large/small exponents.
+  // This also will apply to 0
+  FixupBranch complex = B(CCFlags::CC_HI);  // if (exp < 895 || exp >= 1149)
 
   UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5);  // Grab upper part of mantissa
   MOVP2R(ARM64Reg::X3, &Core::fres_expected);
@@ -308,24 +307,27 @@ void JitArm64::GenerateFres()
   ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
   RET();
 
-  SetJumpTarget(small_exponent);
-  TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_EXP | Core::DOUBLE_FRAC, GPRSize::B64));
-  FixupBranch zero = B(CCFlags::CC_EQ);
-  MOVI2R(ARM64Reg::X4, std::bit_cast<u64>(static_cast<double>(std::numeric_limits<float>::max())));
-  ORR(ARM64Reg::X0, ARM64Reg::X3, ARM64Reg::X4);
-  RET();
-
   SetJumpTarget(zero);
   LDR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
   FixupBranch skip_set_zx = TBNZ(ARM64Reg::W4, 26);
   ORRI2R(ARM64Reg::W4, ARM64Reg::W4, FPSCR_FX | FPSCR_ZX, ARM64Reg::W2);
   STR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
+  // As of now, the JIT does not check for ZE
   SetJumpTarget(skip_set_zx);
-  RET();
 
-  SetJumpTarget(large_exponent);
-  CMP(ARM64Reg::X2, 0x7FF);
-  CSEL(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X3, CCFlags::CC_EQ);
+  SetJumpTarget(complex);
+  ADD(ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(fpscr));
+
+  BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
+  BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
+
+  ABI_PushRegisters(regs_in_use);
+  m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
+  // `val` will still be in D0, like needed for this call
+  ABI_CallFunction(&Core::ApproximateReciprocal, ARM64Reg::X0);
+  ABI_PopRegisters(regs_in_use);
+  m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
+  ABI_PopRegisters(regs_in_use);
   RET();
 }
 
diff --git a/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp b/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
index bf92c65999..8394a780d8 100644
--- a/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
@@ -4,7 +4,6 @@
 #include <bit>
 
 #include "Common/CommonTypes.h"
-#include "Common/FloatUtils.h"
 #include "Common/ScopeGuard.h"
 #include "Common/x64ABI.h"
 #include "Core/Core.h"
@@ -68,7 +67,7 @@ TEST(Jit64, Fres)
   {
     const double dvalue = std::bit_cast<double>(ivalue);
 
-    const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(dvalue));
+    const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocal(fpscr, dvalue));
     const u64 actual = test.wrapped_fres(ivalue, fpscr);
 
     if (expected != actual)
diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
index 5815712f02..083b87689f 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
@@ -6,7 +6,6 @@
 
 #include "Common/Arm64Emitter.h"
 #include "Common/CommonTypes.h"
-#include "Common/FloatUtils.h"
 #include "Common/ScopeGuard.h"
 #include "Core/Core.h"
 #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp
index 1534cec0c5..42f1676f8e 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp
@@ -6,7 +6,6 @@
 
 #include "Common/Arm64Emitter.h"
 #include "Common/CommonTypes.h"
-#include "Common/FloatUtils.h"
 #include "Common/ScopeGuard.h"
 #include "Core/Core.h"
 #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"

From 9a477e774f0a5fe89a5976ba0140aff4c07095a7 Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Fri, 23 Jan 2026 22:33:21 -0600
Subject: [PATCH 03/14] Properly Handle 0 in Fres Again

---
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index eecf410134..9e288b65b6 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -307,7 +307,10 @@ void JitArm64::GenerateFres()
   ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
   RET();
 
-  SetJumpTarget(zero);
+  SetJumpTarget(complex);
+  AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
+  FixupBranch nonzero = B(CCFlags::CC_NEQ);
+
   LDR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
   FixupBranch skip_set_zx = TBNZ(ARM64Reg::W4, 26);
   ORRI2R(ARM64Reg::W4, ARM64Reg::W4, FPSCR_FX | FPSCR_ZX, ARM64Reg::W2);
@@ -315,7 +318,7 @@ void JitArm64::GenerateFres()
   // As of now, the JIT does not check for ZE
   SetJumpTarget(skip_set_zx);
 
-  SetJumpTarget(complex);
+  SetJumpTarget(nonzero);
   ADD(ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(fpscr));
 
   BitSet32 regs_in_use = gpr.GetCallerSavedUsed();

From a3200b0bc0b38ee184a303127553592a7ca25535 Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Sat, 24 Jan 2026 13:37:20 -0600
Subject: [PATCH 04/14] Temporarily(?) Make ARM Less Mad Hopefully

---
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index 9e288b65b6..5f9f96a5ea 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -327,7 +327,8 @@ void JitArm64::GenerateFres()
   ABI_PushRegisters(regs_in_use);
   m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
   // `val` will still be in D0, like needed for this call
-  ABI_CallFunction(&Core::ApproximateReciprocal, ARM64Reg::X0);
+  // TEMPORARY!!! SHOULD BE OKAY FOR TESTING BUT PROBABLY NOT GOOD TO KEEP!!!
+  QuickCallFunction(ARM64Reg::X8, &Core::ApproximateReciprocal);
   ABI_PopRegisters(regs_in_use);
   m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
   ABI_PopRegisters(regs_in_use);

From 71655bfeaa7b1b32f5d819b0eb5700cb9d1dfdf5 Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Sat, 24 Jan 2026 14:15:19 -0600
Subject: [PATCH 05/14] Attempted Resolve of Reviews

---
 Source/Core/Core/FloatUtils.cpp               |  9 +++++++
 .../PowerPC/Jit64Common/Jit64AsmCommon.cpp    |  2 +-
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp  | 24 +++++++++----------
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/Source/Core/Core/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
index b6ad62b213..b197a1b699 100644
--- a/Source/Core/Core/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -205,4 +205,13 @@ double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val)
   return static_cast<double>(std::bit_cast<float>(result));
 }
 
+// Variation of `ApproximateReciprocal`, operating on the bits rather than the raw value
+u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, u64 integral)
+{
+  // Casting to a double is still done due to e.g. `isnan` checks in the actual function
+  const f64 val = std::bit_cast<f64>(integral);
+  const f64 result = ApproximateReciprocal(fpscr, val);
+  return std::bit_cast<u64>(result);
+}
+
 }  // namespace Core
diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
index c6e89c3cc8..78fb797bf7 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -268,7 +268,7 @@ void CommonAsmRoutines::GenFres()
   // Exception flags for zero input.
   SetJumpTarget(zero);
   TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
-  FixupBranch skip_set_fx1 = J_CC(CC_NZ);  // As of now, the JIT does not check for ZE
+  FixupBranch skip_set_fx1 = J_CC(CC_NZ);
   OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX));
   SetJumpTarget(skip_set_fx1);
 
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index 5f9f96a5ea..a7fb125953 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -308,30 +308,30 @@ void JitArm64::GenerateFres()
   RET();
 
   SetJumpTarget(complex);
-  AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
+  TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
   FixupBranch nonzero = B(CCFlags::CC_NEQ);
 
   LDR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
   FixupBranch skip_set_zx = TBNZ(ARM64Reg::W4, 26);
   ORRI2R(ARM64Reg::W4, ARM64Reg::W4, FPSCR_FX | FPSCR_ZX, ARM64Reg::W2);
   STR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
-  // As of now, the JIT does not check for ZE
   SetJumpTarget(skip_set_zx);
+  // X0 will already be the proper infinity
+  RET();
 
   SetJumpTarget(nonzero);
   ADD(ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(fpscr));
 
-  BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
-  BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
+  // X0 - X4 are acknowledged to be clobbered by this function,
+  // with X0 being the return value, making it particularly undesirable to pop after
+  // the function call concludes
+  BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1, 2, 3, 4};
 
-  ABI_PushRegisters(regs_in_use);
-  m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
-  // `val` will still be in D0, like needed for this call
-  // TEMPORARY!!! SHOULD BE OKAY FOR TESTING BUT PROBABLY NOT GOOD TO KEEP!!!
-  QuickCallFunction(ARM64Reg::X8, &Core::ApproximateReciprocal);
-  ABI_PopRegisters(regs_in_use);
-  m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
-  ABI_PopRegisters(regs_in_use);
+  ABI_PushRegisters(gprs_to_push);
+  m_float_emit.ABI_PushRegisters(CALLER_SAVED_FPRS, ARM64Reg::X30);
+  ABI_CallFunction(&Core::ApproximateReciprocalBits, ARM64Reg::X0, ARM64Reg::X1);
+  m_float_emit.ABI_PopRegisters(CALLER_SAVED_FPRS, ARM64Reg::X30);
+  ABI_PopRegisters(gprs_to_push);
   RET();
 }
 

From 8774a92272092431f64b703b5d7200bdad388deb Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Sat, 24 Jan 2026 14:17:44 -0600
Subject: [PATCH 06/14] Fix Zero Check to Not Use Wrong Mask

---
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index a7fb125953..077468ed5f 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -308,7 +308,7 @@ void JitArm64::GenerateFres()
   RET();
 
   SetJumpTarget(complex);
-  TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
+  TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_EXP | Core::DOUBLE_FRAC, GPRSize::B64));
   FixupBranch nonzero = B(CCFlags::CC_NEQ);
 
   LDR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));

From 6217ed455350ae97889d2769a1c3a0572e7792fc Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Sat, 24 Jan 2026 14:21:47 -0600
Subject: [PATCH 07/14] Don't Forget to Build Locally First, My Goodness

---
 Source/Core/Core/FloatUtils.cpp | 4 ++--
 Source/Core/Core/FloatUtils.h   | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Core/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
index b197a1b699..4e8ec33693 100644
--- a/Source/Core/Core/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -209,8 +209,8 @@ double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val)
 u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, u64 integral)
 {
   // Casting to a double is still done due to e.g. `isnan` checks in the actual function
-  const f64 val = std::bit_cast<f64>(integral);
-  const f64 result = ApproximateReciprocal(fpscr, val);
+  const double val = std::bit_cast<double>(integral);
+  const double result = ApproximateReciprocal(fpscr, val);
   return std::bit_cast<u64>(result);
 }
 
diff --git a/Source/Core/Core/FloatUtils.h b/Source/Core/Core/FloatUtils.h
index 27a6b03a4b..cfb6080161 100644
--- a/Source/Core/Core/FloatUtils.h
+++ b/Source/Core/Core/FloatUtils.h
@@ -105,6 +105,7 @@ extern const std::array<BaseAndDec, 32> fres_expected;
 // PowerPC approximation algorithms
 double ApproximateReciprocalSquareRoot(double val);
 double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val);
+u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, u64 integral);
 
 // Instructions which move data without performing operations round a bit weirdly
 // Specifically they rounding the mantissa to be like that of a 32-bit float,

From e1f94159b15ce5e05dff637601cecf7388903895 Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Fri, 30 Jan 2026 22:20:22 -0600
Subject: [PATCH 08/14] Change  to be the bulk of the operation, use less
 bitcasting

---
 Source/Core/Core/FloatUtils.cpp              | 55 ++++++++++++--------
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp |  3 +-
 2 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/Source/Core/Core/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
index 4e8ec33693..80b5f38958 100644
--- a/Source/Core/Core/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -138,6 +138,9 @@ double ApproximateReciprocalSquareRoot(double val)
   return std::bit_cast<double>(integral);
 }
 
+// TODO: This can be made more efficient by pre-shifting the results
+// for double precision, but this requires adjusting all JITs and
+// the denormal case.
 const std::array<BaseAndDec, 32> fres_expected = {{
     {0xfff000, -0x3e1}, {0xf07000, -0x3a7}, {0xe1d400, -0x371}, {0xd41000, -0x340},
     {0xc71000, -0x313}, {0xbac400, -0x2ea}, {0xaf2000, -0x2c4}, {0xa41000, -0x2a0},
@@ -149,69 +152,77 @@ const std::array<BaseAndDec, 32> fres_expected = {{
     {0x110800, -0x11a}, {0x0ca000, -0x11a}, {0x083800, -0x108}, {0x041800, -0x106},
 }};
 
-// Used by fres and ps_res.
-double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val)
+// Variation of `ApproximateReciprocal`, operating on the bits rather than the raw value
+u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, const u64 integral)
 {
-  const u64 integral = std::bit_cast<u64>(val);
-
   // Convert into a float when possible
   const u64 signless = integral & ~DOUBLE_SIGN;
   const u32 mantissa =
       static_cast<u32>((integral & DOUBLE_FRAC) >> (DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH));
-  const u32 sign = static_cast<u32>((integral >> 32) & FLOAT_SIGN);
   const s32 exponent = static_cast<s32>((integral & DOUBLE_EXP) >> DOUBLE_FRAC_WIDTH) - 0x380;
 
   // The largest floats possible just return 0
   const u64 huge_float = fpscr.NI ? 0x47d0000000000000ULL : 0x4940000000000000ULL;
 
-  // Special case 0
+  // Special case 0 returns infinity
   if (signless == 0)
-    return std::copysign(std::numeric_limits<double>::infinity(), val);
+    return DOUBLE_EXP | (integral & DOUBLE_SIGN);
 
   // Special case huge or NaN-ish numbers
   if (signless >= huge_float)
   {
-    if (!std::isnan(val))
-      return std::copysign(0.0, val);
-    return MakeQuiet(val);
+    // The value is NaN if, disregarding the sign, its exponent is maximized,
+    // and its mantissa is nonzero
+    const bool is_nan = (integral & ~DOUBLE_SIGN) > DOUBLE_EXP;
+
+    if (!is_nan)
+      return integral & DOUBLE_SIGN;
+    return integral | DOUBLE_QBIT;
   }
 
   // Special case small inputs
   if (exponent < -1)
-    return std::copysign(std::numeric_limits<float>::max(), val);
+  {
+    // Return the largest finite value for a float!
+    const u64 float_max = 0x47efffffe0000000ULL;
+    return float_max | (integral & DOUBLE_SIGN);
+  }
 
   const s32 new_exponent = 253 - exponent;
 
   const u32 i = static_cast<u32>(mantissa >> 8);
   const auto& entry = fres_expected[i / 1024];
-  const u32 new_mantissa = static_cast<u32>(entry.m_base + entry.m_dec * (i % 1024)) / 2;
+  u32 new_mantissa = static_cast<u32>(entry.m_base + entry.m_dec * (i % 1024)) / 2;
 
-  u32 result = sign | (static_cast<u32>(new_exponent) << FLOAT_FRAC_WIDTH) | new_mantissa;
   if (new_exponent <= 0)
   {
     // Result is subnormal so format it properly!
     if (fpscr.NI)
     {
       // Flush to 0 if inexact
-      result = sign;
+      return integral & DOUBLE_SIGN;
     }
     else
     {
       // Shift by the exponent amount
       u32 shift = 1 + static_cast<u32>(-new_exponent);
-      result = sign | (((1 << FLOAT_FRAC_WIDTH) | new_mantissa) >> shift);
+      new_mantissa = (new_mantissa >> shift) << shift;
     }
   }
-  return static_cast<double>(std::bit_cast<float>(result));
+
+  // Convert the result back to a double format!
+  u64 double_result = (integral & DOUBLE_SIGN) | (static_cast<u64>(new_exponent + 0x380) << 52) |
+                      (static_cast<u64>(new_mantissa) << (DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH));
+
+  return double_result;
 }
 
-// Variation of `ApproximateReciprocal`, operating on the bits rather than the raw value
-u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, u64 integral)
+// Used by fres and ps_res.
+double ApproximateReciprocal(const UReg_FPSCR& fpscr, const double val)
 {
-  // Casting to a double is still done due to e.g. `isnan` checks in the actual function
-  const double val = std::bit_cast<double>(integral);
-  const double result = ApproximateReciprocal(fpscr, val);
-  return std::bit_cast<u64>(result);
+  const u64 integral = std::bit_cast<u64>(val);
+  const u64 result = ApproximateReciprocalBits(fpscr, integral);
+  return std::bit_cast<double>(result);
 }
 
 }  // namespace Core
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index 077468ed5f..dc41c34b13 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -323,8 +323,7 @@ void JitArm64::GenerateFres()
   ADD(ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(fpscr));
 
   // X0 - X4 are acknowledged to be clobbered by this function,
-  // with X0 being the return value, making it particularly undesirable to pop after
-  // the function call concludes
+  // though they would not normally be pushed to the stack anyways
   BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1, 2, 3, 4};
 
   ABI_PushRegisters(gprs_to_push);

From 7cc250ae33f90987e997d0476469e1535825490c Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Fri, 30 Jan 2026 23:22:36 -0600
Subject: [PATCH 09/14] Change tiny thing in JIT, add very temporary dumb test
 for debugging

---
 Source/Core/Core/FloatUtils.cpp                         | 2 +-
 Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp | 3 ++-
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp            | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Source/Core/Core/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
index 80b5f38958..455addc6c5 100644
--- a/Source/Core/Core/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -152,7 +152,7 @@ const std::array<BaseAndDec, 32> fres_expected = {{
     {0x110800, -0x11a}, {0x0ca000, -0x11a}, {0x083800, -0x108}, {0x041800, -0x106},
 }};
 
-// Variation of `ApproximateReciprocal`, operating on the bits rather than the raw value
+// Raw function used by the JITs for fres and ps_res
 u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, const u64 integral)
 {
   // Convert into a float when possible
diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
index 78fb797bf7..7d14e2581e 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -275,7 +275,8 @@ void CommonAsmRoutines::GenFres()
   SetJumpTarget(complex);
   ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
   LEA(64, ABI_PARAM1, PPCSTATE(fpscr));
-  ABI_CallFunction(Core::ApproximateReciprocal);
+  ABI_CallFunctionRR(Core::ApproximateReciprocalBits, ABI_PARAM1, RSCRATCH);
+  MOVQ_xmm(XMM0, R(ABI_RETURN));
   ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
   RET();
 
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index dc41c34b13..477e2609a7 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -291,7 +291,7 @@ void JitArm64::GenerateFres()
   m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
   AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
   SUB(ARM64Reg::X2, ARM64Reg::X2, 895);
-  CMP(ARM64Reg::X2, 1148 - 895);
+  CMP(ARM64Reg::X2, 1147 - 895);  // Temporary extremely dumb test
   // Take the complex path for very large/small exponents.
   // This also will apply to 0
   FixupBranch complex = B(CCFlags::CC_HI);  // if (exp < 895 || exp >= 1149)

From 9f71a1cb49e4e0f6c34b6c40c5870f4e1173848f Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Fri, 30 Jan 2026 23:43:38 -0600
Subject: [PATCH 10/14] Yet another experiment for OSX

---
 Source/Core/Core/FloatUtils.cpp              | 6 ++++++
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/Source/Core/Core/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
index 455addc6c5..298bf5a612 100644
--- a/Source/Core/Core/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -171,6 +171,12 @@ u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, const u64 integral)
   // Special case huge or NaN-ish numbers
   if (signless >= huge_float)
   {
+    if (integral == 0x47d0000000000000)
+    {
+      // An extremely temporary test to see if this function is even reached
+      return huge_float;
+    }
+
     // The value is NaN if, disregarding the sign, its exponent is maximized,
     // and its mantissa is nonzero
     const bool is_nan = (integral & ~DOUBLE_SIGN) > DOUBLE_EXP;
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index 477e2609a7..dc41c34b13 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -291,7 +291,7 @@ void JitArm64::GenerateFres()
   m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
   AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
   SUB(ARM64Reg::X2, ARM64Reg::X2, 895);
-  CMP(ARM64Reg::X2, 1147 - 895);  // Temporary extremely dumb test
+  CMP(ARM64Reg::X2, 1148 - 895);
   // Take the complex path for very large/small exponents.
   // This also will apply to 0
   FixupBranch complex = B(CCFlags::CC_HI);  // if (exp < 895 || exp >= 1149)

From 88785ff64538585eb0471da38cac23d614f3f194 Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Sat, 31 Jan 2026 01:52:32 -0600
Subject: [PATCH 11/14] Fueling the paranoia, can this condition even pass

---
 Source/Core/Core/FloatUtils.cpp              | 6 ------
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/Source/Core/Core/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
index 298bf5a612..455addc6c5 100644
--- a/Source/Core/Core/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -171,12 +171,6 @@ u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, const u64 integral)
   // Special case huge or NaN-ish numbers
   if (signless >= huge_float)
   {
-    if (integral == 0x47d0000000000000)
-    {
-      // An extremely temporary test to see if this function is even reached
-      return huge_float;
-    }
-
     // The value is NaN if, disregarding the sign, its exponent is maximized,
     // and its mantissa is nonzero
     const bool is_nan = (integral & ~DOUBLE_SIGN) > DOUBLE_EXP;
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index dc41c34b13..b198dcab41 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -291,7 +291,7 @@ void JitArm64::GenerateFres()
   m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
   AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
   SUB(ARM64Reg::X2, ARM64Reg::X2, 895);
-  CMP(ARM64Reg::X2, 1148 - 895);
+  CMP(ARM64Reg::X2, 1146 - 895);
   // Take the complex path for very large/small exponents.
   // This also will apply to 0
   FixupBranch complex = B(CCFlags::CC_HI);  // if (exp < 895 || exp >= 1149)

From d6d13afad36239f4149ec5df54fb21d576a106bc Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Wed, 4 Feb 2026 00:12:49 -0600
Subject: [PATCH 12/14] Paranoia was fueled, add varying FPSCRs to FRES tests

---
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp  |  2 +-
 .../Core/PowerPC/Jit64Common/Fres.cpp         | 25 +++++++++++-------
 .../UnitTests/Core/PowerPC/JitArm64/Fres.cpp  | 26 ++++++++++++-------
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index b198dcab41..dc41c34b13 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -291,7 +291,7 @@ void JitArm64::GenerateFres()
   m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
   AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
   SUB(ARM64Reg::X2, ARM64Reg::X2, 895);
-  CMP(ARM64Reg::X2, 1146 - 895);
+  CMP(ARM64Reg::X2, 1148 - 895);
   // Take the complex path for very large/small exponents.
   // This also will apply to 0
   FixupBranch complex = B(CCFlags::CC_HI);  // if (exp < 895 || exp >= 1149)
diff --git a/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp b/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
index 8394a780d8..7d30de84cd 100644
--- a/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
@@ -60,19 +60,26 @@ TEST(Jit64, Fres)
 
   TestFres test(Core::System::GetInstance());
 
-  // FPSCR with NI set
-  const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004);
+  constexpr std::array<u32, 2> test_fpscrs{
+      0x00000000,  // FPSCR with NI unset
+      0x00000004,  // FPSCR with NI set
+  };
 
-  for (const u64 ivalue : double_test_values)
+  for (const u32 fpscr_hex : test_fpscrs)
   {
-    const double dvalue = std::bit_cast<double>(ivalue);
+    const UReg_FPSCR fpscr = UReg_FPSCR(fpscr_hex);
 
-    const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocal(fpscr, dvalue));
-    const u64 actual = test.wrapped_fres(ivalue, fpscr);
+    for (const u64 ivalue : double_test_values)
+    {
+      const double dvalue = std::bit_cast<double>(ivalue);
 
-    if (expected != actual)
-      fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);
+      const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocal(fpscr, dvalue));
+      const u64 actual = test.wrapped_fres(ivalue, fpscr);
 
-    EXPECT_EQ(expected, actual);
+      if (expected != actual)
+        fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);
+
+      EXPECT_EQ(expected, actual);
+    }
   }
 }
diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
index 083b87689f..49b117f88a 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
@@ -1,6 +1,7 @@
 // Copyright 2021 Dolphin Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <array>
 #include <bit>
 #include <functional>
 
@@ -59,19 +60,26 @@ TEST(JitArm64, Fres)
 
   TestFres test(Core::System::GetInstance());
 
-  // FPSCR with NI set
-  const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004);
+  constexpr std::array<u32, 2> test_fpscrs{
+      0x00000000,  // FPSCR with NI unset
+      0x00000004,  // FPSCR with NI set
+  };
 
-  for (const u64 ivalue : double_test_values)
+  for (const u32 fpscr_hex : test_fpscrs)
   {
-    const double dvalue = std::bit_cast<double>(ivalue);
+    const UReg_FPSCR fpscr = UReg_FPSCR(fpscr_hex);
 
-    const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocal(fpscr, dvalue));
-    const u64 actual = test.fres(ivalue);
+    for (const u64 ivalue : double_test_values)
+    {
+      const double dvalue = std::bit_cast<double>(ivalue);
 
-    if (expected != actual)
-      fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);
+      const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocal(fpscr, dvalue));
+      const u64 actual = test.fres(ivalue);
 
-    EXPECT_EQ(expected, actual);
+      if (expected != actual)
+        fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);
+
+      EXPECT_EQ(expected, actual);
+    }
   }
 }

From 1168ec140b4e72683d8c9d2f2abb163ccd9c78aa Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Wed, 4 Feb 2026 00:20:44 -0600
Subject: [PATCH 13/14] Update FRES tests to print FPSCR state as well

---
 Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp | 3 ++-
 Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp    | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp b/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
index 7d30de84cd..46f6554173 100644
--- a/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/Jit64Common/Fres.cpp
@@ -77,7 +77,8 @@ TEST(Jit64, Fres)
       const u64 actual = test.wrapped_fres(ivalue, fpscr);
 
       if (expected != actual)
-        fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);
+        fmt::print("{:016x} -> {:016x} == {:016x} (FPSCR {:08x})\n", ivalue, actual, expected,
+                   fpscr_hex);
 
       EXPECT_EQ(expected, actual);
     }
diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
index 49b117f88a..a363dadb59 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
@@ -77,7 +77,8 @@ TEST(JitArm64, Fres)
       const u64 actual = test.fres(ivalue);
 
       if (expected != actual)
-        fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);
+        fmt::print("{:016x} -> {:016x} == {:016x} (FPSCR {:08x})\n", ivalue, actual, expected,
+                   fpscr_hex);
 
       EXPECT_EQ(expected, actual);
     }

From 34f448b52685817ad30ae2a5776441a162bbbeea Mon Sep 17 00:00:00 2001
From: Nuh Uh <72356786+Geotale@users.noreply.github.com>
Date: Wed, 4 Feb 2026 01:37:38 -0600
Subject: [PATCH 14/14] Re-add infinity + NaN optimization for Aarch64, add
 comments suggesting future optimizations

---
 Source/Core/Core/FloatUtils.cpp              |  2 ++
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 30 +++++++++++++-------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/Source/Core/Core/FloatUtils.cpp b/Source/Core/Core/FloatUtils.cpp
index 455addc6c5..09cbcaa50c 100644
--- a/Source/Core/Core/FloatUtils.cpp
+++ b/Source/Core/Core/FloatUtils.cpp
@@ -153,6 +153,8 @@ const std::array<BaseAndDec, 32> fres_expected = {{
 }};
 
 // Raw function used by the JITs for fres and ps_res
+// Because of this narrow usage, it could be specialized to not check certain conditions,
+// but at least for now for the sake of conciseness it's not going to matter enough.
 u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, const u64 integral)
 {
   // Convert into a float when possible
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index dc41c34b13..dbe9a5af0d 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -308,18 +308,15 @@ void JitArm64::GenerateFres()
   RET();
 
   SetJumpTarget(complex);
+  // Check for 0 and -0
   TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_EXP | Core::DOUBLE_FRAC, GPRSize::B64));
-  FixupBranch nonzero = B(CCFlags::CC_NEQ);
+  FixupBranch zero = B(CCFlags::CC_EQ);
+  // Check for infinities and NaNs
+  // Much smaller exponents could fall back earlier, but precisely when would need to be tested
+  CMP(ARM64Reg::X2, 0x7ff - 895);
+  FixupBranch nonfinite = B(CCFlags::CC_EQ);
 
-  LDR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
-  FixupBranch skip_set_zx = TBNZ(ARM64Reg::W4, 26);
-  ORRI2R(ARM64Reg::W4, ARM64Reg::W4, FPSCR_FX | FPSCR_ZX, ARM64Reg::W2);
-  STR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
-  SetJumpTarget(skip_set_zx);
-  // X0 will already be the proper infinity
-  RET();
-
-  SetJumpTarget(nonzero);
+  // Fall back for more complex cases!
   ADD(ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(fpscr));
 
   // X0 - X4 are acknowledged to be clobbered by this function,
@@ -332,6 +329,19 @@ void JitArm64::GenerateFres()
   m_float_emit.ABI_PopRegisters(CALLER_SAVED_FPRS, ARM64Reg::X30);
   ABI_PopRegisters(gprs_to_push);
   RET();
+
+  SetJumpTarget(zero);
+  LDR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
+  FixupBranch skip_set_zx = TBNZ(ARM64Reg::W4, 26);
+  ORRI2R(ARM64Reg::W4, ARM64Reg::W4, FPSCR_FX | FPSCR_ZX, ARM64Reg::W2);
+  STR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
+  SetJumpTarget(skip_set_zx);
+  // X0 will already be the proper infinity
+  RET();
+
+  SetJumpTarget(nonfinite);
+  // X0 will already contain whatever correct value, NaN or 0
+  RET();
 }
 
 // Input: X1 contains input, and D0 contains result of running the input through AArch64 FRSQRTE.