dolphin/Source/Core/Core/FloatUtils.h

193 lines
5.7 KiB
C++

// Copyright 2018 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <bit>
#include <limits>
#include "Common/CommonTypes.h"
#include "Core/PowerPC/Gekko.h"
namespace Core
{
template <typename T>
constexpr T SNANConstant()
{
return std::numeric_limits<T>::signaling_NaN();
}
// The most significant bit of the fraction is an is-quiet bit on all architectures we care about.
static constexpr u64 DOUBLE_QBIT = 0x0008000000000000ULL;
static constexpr u64 DOUBLE_SIGN = 0x8000000000000000ULL;
static constexpr u64 DOUBLE_EXP = 0x7FF0000000000000ULL;
static constexpr u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL;
static constexpr u64 DOUBLE_ZERO = 0x0000000000000000ULL;
static constexpr int DOUBLE_EXP_WIDTH = 11;
static constexpr int DOUBLE_FRAC_WIDTH = 52;
static constexpr u32 FLOAT_SIGN = 0x80000000;
static constexpr u32 FLOAT_EXP = 0x7F800000;
static constexpr u32 FLOAT_FRAC = 0x007FFFFF;
static constexpr u32 FLOAT_ZERO = 0x00000000;
static constexpr int FLOAT_EXP_WIDTH = 8;
static constexpr int FLOAT_FRAC_WIDTH = 23;
inline bool IsQNAN(double d)
{
const u64 i = std::bit_cast<u64>(d);
return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_QBIT) == DOUBLE_QBIT);
}
inline bool IsSNAN(double d)
{
const u64 i = std::bit_cast<u64>(d);
return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
((i & DOUBLE_QBIT) == DOUBLE_ZERO);
}
inline float FlushToZero(float f)
{
u32 i = std::bit_cast<u32>(f);
if ((i & FLOAT_EXP) == 0)
{
// Turn into signed zero
i &= FLOAT_SIGN;
}
return std::bit_cast<float>(i);
}
inline double FlushToZero(double d)
{
u64 i = std::bit_cast<u64>(d);
if ((i & DOUBLE_EXP) == 0)
{
// Turn into signed zero
i &= DOUBLE_SIGN;
}
return std::bit_cast<double>(i);
}
inline double MakeQuiet(double d)
{
const u64 integral = std::bit_cast<u64>(d) | DOUBLE_QBIT;
return std::bit_cast<double>(integral);
}
enum PPCFpClass
{
PPC_FPCLASS_QNAN = 0x11,
PPC_FPCLASS_NINF = 0x9,
PPC_FPCLASS_NN = 0x8,
PPC_FPCLASS_ND = 0x18,
PPC_FPCLASS_NZ = 0x12,
PPC_FPCLASS_PZ = 0x2,
PPC_FPCLASS_PD = 0x14,
PPC_FPCLASS_PN = 0x4,
PPC_FPCLASS_PINF = 0x5,
};
// Uses PowerPC conventions for the return value, so it can be easily
// used directly in CPU emulation.
u32 ClassifyDouble(double dvalue);
u32 ClassifyFloat(float fvalue);
struct BaseAndDec
{
int m_base;
int m_dec;
};
extern const std::array<BaseAndDec, 32> frsqrte_expected;
extern const std::array<BaseAndDec, 32> fres_expected;
// PowerPC approximation algorithms
double ApproximateReciprocalSquareRoot(double val);
double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val);
u64 ApproximateReciprocalBits(const UReg_FPSCR& fpscr, u64 integral);
// Instructions which move data without performing operations round a bit weirdly
// Specifically they rounding the mantissa to be like that of a 32-bit float,
// going as far as to focus on the rounding mode, but never actually care about
// making sure the exponent becomes 32-bit
// Either this, or they'll truncate the mantissa down, which will always happen to
// PS1 OR PS0 in ps_rsqrte
inline u64 TruncateMantissaBits(u64 bits)
{
// Truncation can be done by simply cutting off the mantissa bits that don't
// exist in a single precision float
constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH;
constexpr u64 remove_mask = (1 << remove_bits) - 1;
return bits & ~remove_mask;
}
inline double TruncateMantissa(double value)
{
u64 bits = std::bit_cast<u64>(value);
u64 trunc_bits = TruncateMantissaBits(bits);
return std::bit_cast<double>(trunc_bits);
}
inline u64 RoundMantissaBitsFinite(u64 bits)
{
const u64 replacement_exp = 0x4000000000000000ull;
// To round only the mantissa, we assume the host CPU properly matches
// the emulated CPU's rounding mode so the rounding of the mantissa will
// go in the correct direction
// The removing and restoring of the exponent is done via subtraction instead of
// bitwise operations due to the possibility that the rounding will cause an overflow
// from the mantissa into the exponent (incrementing it by 1)
u64 resized_bits = (bits & (DOUBLE_FRAC | DOUBLE_SIGN)) | replacement_exp;
float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
double extended_float = static_cast<double>(rounded_float);
u64 rounded_bits = std::bit_cast<u64>(extended_float);
u64 orig_exp_bits = bits & DOUBLE_EXP;
if (orig_exp_bits == 0)
{
// The exponent isn't incremented for double subnormals
return rounded_bits & ~DOUBLE_EXP;
}
// Handle the change accordingly otherwise!
rounded_bits = (rounded_bits - replacement_exp) + orig_exp_bits;
return rounded_bits;
}
inline u64 RoundMantissaBits(u64 bits)
{
// Checking if the value is non-finite
if ((bits & DOUBLE_EXP) == DOUBLE_EXP)
{
// For infinite and NaN values, the mantissa is simply truncated
return TruncateMantissaBits(bits);
}
return RoundMantissaBitsFinite(bits);
}
inline double RoundMantissaFinite(double value)
{
// This function is only ever used by ps_sum1, because
// for some reason it assumes that ps0 should be rounded with
// finite values rather than checking if they might be infinite
u64 bits = std::bit_cast<u64>(value);
u64 rounded_bits = RoundMantissaBitsFinite(bits);
return std::bit_cast<double>(rounded_bits);
}
inline double RoundMantissa(double value)
{
// This function just bitcasts the double value parameter so it
// can be used in the more common function that operates on the raw bits
u64 bits = std::bit_cast<u64>(value);
u64 rounded_bits = RoundMantissaBits(bits);
return std::bit_cast<double>(rounded_bits);
}
} // namespace Core