dolphin/Source/Core/Core/FloatUtils.h

// Copyright 2018 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <array>
#include <bit>
#include <limits>

#include "Common/CommonTypes.h"
#include "Core/PowerPC/Gekko.h"

namespace Core
{
template <typename T>
constexpr T SNANConstant()
{
  return std::numeric_limits<T>::signaling_NaN();
}

// The most significant bit of the fraction is an is-quiet bit on all architectures we care about.
static constexpr u64 DOUBLE_QBIT = 0x0008000000000000ULL;
static constexpr u64 DOUBLE_SIGN = 0x8000000000000000ULL;
static constexpr u64 DOUBLE_EXP = 0x7FF0000000000000ULL;
static constexpr u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL;
static constexpr u64 DOUBLE_ZERO = 0x0000000000000000ULL;
static constexpr int DOUBLE_EXP_WIDTH = 11;
static constexpr int DOUBLE_FRAC_WIDTH = 52;

static constexpr u32 FLOAT_SIGN = 0x80000000;
static constexpr u32 FLOAT_EXP = 0x7F800000;
static constexpr u32 FLOAT_FRAC = 0x007FFFFF;
static constexpr u32 FLOAT_ZERO = 0x00000000;
static constexpr int FLOAT_EXP_WIDTH = 8;
static constexpr int FLOAT_FRAC_WIDTH = 23;

inline bool IsQNAN(double d)
{
  const u64 i = std::bit_cast<u64>(d);
  return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_QBIT) == DOUBLE_QBIT);
}

inline bool IsSNAN(double d)
{
  const u64 i = std::bit_cast<u64>(d);
  return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
         ((i & DOUBLE_QBIT) == DOUBLE_ZERO);
}

inline float FlushToZero(float f)
{
  u32 i = std::bit_cast<u32>(f);
  if ((i & FLOAT_EXP) == 0)
  {
    // Turn into signed zero
    i &= FLOAT_SIGN;
  }
  return std::bit_cast<float>(i);
}

inline double FlushToZero(double d)
{
  u64 i = std::bit_cast<u64>(d);
  if ((i & DOUBLE_EXP) == 0)
  {
    // Turn into signed zero
    i &= DOUBLE_SIGN;
  }
  return std::bit_cast<double>(i);
}

inline double MakeQuiet(double d)
{
  const u64 integral = std::bit_cast<u64>(d) | DOUBLE_QBIT;

  return std::bit_cast<double>(integral);
}

enum PPCFpClass
{
  PPC_FPCLASS_QNAN = 0x11,
  PPC_FPCLASS_NINF = 0x9,
  PPC_FPCLASS_NN = 0x8,
  PPC_FPCLASS_ND = 0x18,
  PPC_FPCLASS_NZ = 0x12,
  PPC_FPCLASS_PZ = 0x2,
  PPC_FPCLASS_PD = 0x14,
  PPC_FPCLASS_PN = 0x4,
  PPC_FPCLASS_PINF = 0x5,
};

// Uses PowerPC conventions for the return value, so it can be easily
// used directly in CPU emulation.
u32 ClassifyDouble(double dvalue);
u32 ClassifyFloat(float fvalue);

struct BaseAndDec
{
  int m_base;
  int m_dec;
};
extern const std::array<BaseAndDec, 32> frsqrte_expected;
extern const std::array<BaseAndDec, 32> fres_expected;

// PowerPC approximation algorithms
double ApproximateReciprocalSquareRoot(double val);
double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val);
u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, u64 integral);

// Instructions which move data without performing operations round a bit weirdly
// Specifically they rounding the mantissa to be like that of a 32-bit float,
// going as far as to focus on the rounding mode, but never actually care about
// making sure the exponent becomes 32-bit
// Either this, or they'll truncate the mantissa down, which will always happen to
// PS1 OR PS0 in ps_rsqrte
inline u64 TruncateMantissaBits(u64 bits)
{
  // Truncation can be done by simply cutting off the mantissa bits that don't
  // exist in a single precision float
  constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH;
  constexpr u64 remove_mask = (1 << remove_bits) - 1;
  return bits & ~remove_mask;
}

inline double TruncateMantissa(double value)
{
  u64 bits = std::bit_cast<u64>(value);
  u64 trunc_bits = TruncateMantissaBits(bits);
  return std::bit_cast<double>(trunc_bits);
}

inline u64 RoundMantissaBitsFinite(u64 bits)
{
  const u64 replacement_exp = 0x4000000000000000ull;

  // To round only the mantissa, we assume the host CPU properly matches
  // the emulated CPU's rounding mode so the rounding of the mantissa will
  // go in the correct direction
  // The removing and restoring of the exponent is done via subtraction instead of
  // bitwise operations due to the possibility that the rounding will cause an overflow
  // from the mantissa into the exponent (incrementing it by 1)
  u64 resized_bits = (bits & (DOUBLE_FRAC | DOUBLE_SIGN)) | replacement_exp;

  float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
  double extended_float = static_cast<double>(rounded_float);
  u64 rounded_bits = std::bit_cast<u64>(extended_float);

  u64 orig_exp_bits = bits & DOUBLE_EXP;

  if (orig_exp_bits == 0)
  {
    // The exponent isn't incremented for double subnormals
    return rounded_bits & ~DOUBLE_EXP;
  }

  // Handle the change accordingly otherwise!
  rounded_bits = (rounded_bits - replacement_exp) + orig_exp_bits;
  return rounded_bits;
}

inline u64 RoundMantissaBits(u64 bits)
{
  // Checking if the value is non-finite
  if ((bits & DOUBLE_EXP) == DOUBLE_EXP)
  {
    // For infinite and NaN values, the mantissa is simply truncated
    return TruncateMantissaBits(bits);
  }

  return RoundMantissaBitsFinite(bits);
}

inline double RoundMantissaFinite(double value)
{
  // This function is only ever used by ps_sum1, because
  // for some reason it assumes that ps0 should be rounded with
  // finite values rather than checking if they might be infinite
  u64 bits = std::bit_cast<u64>(value);
  u64 rounded_bits = RoundMantissaBitsFinite(bits);
  return std::bit_cast<double>(rounded_bits);
}

inline double RoundMantissa(double value)
{
  // This function just bitcasts the double value parameter so it
  // can be used in the more common function that operates on the raw bits
  u64 bits = std::bit_cast<u64>(value);
  u64 rounded_bits = RoundMantissaBits(bits);
  return std::bit_cast<double>(rounded_bits);
}

}  // namespace Core