diff --git a/include/remill/Arch/Runtime/Intrinsics.h b/include/remill/Arch/Runtime/Intrinsics.h index 7d2b9ca0f..c3f788473 100644 --- a/include/remill/Arch/Runtime/Intrinsics.h +++ b/include/remill/Arch/Runtime/Intrinsics.h @@ -57,7 +57,7 @@ __remill_write_memory_64(Memory *, addr_t, uint64_t); [[gnu::used, gnu::const]] extern float64_t __remill_read_memory_f64(Memory *, addr_t); -[[gnu::used]] extern float64_t __remill_read_memory_f80(Memory *, addr_t); +[[gnu::used]] extern float80_t __remill_read_memory_f80(Memory *, addr_t); [[gnu::used]] extern float64_t __remill_read_memory_f128(Memory *, addr_t); @@ -68,7 +68,7 @@ __remill_write_memory_f32(Memory *, addr_t, float32_t); __remill_write_memory_f64(Memory *, addr_t, float64_t); [[gnu::used]] extern Memory *__remill_write_memory_f80(Memory *, addr_t, - float64_t); + float80_t); [[gnu::used]] extern Memory *__remill_write_memory_f128(Memory *, addr_t, float64_t); diff --git a/include/remill/Arch/Runtime/Operators.h b/include/remill/Arch/Runtime/Operators.h index 60ad2686f..b13d192dd 100644 --- a/include/remill/Arch/Runtime/Operators.h +++ b/include/remill/Arch/Runtime/Operators.h @@ -576,6 +576,11 @@ ALWAYS_INLINE uint8_t issignaling(float64_t x) { return x_nan.exponent == 0x7FFU && !x_nan.is_quiet_nan && x_nan.payload; } +ALWAYS_INLINE uint8_t issignaling(float80_t x) { + const nan64_t x_nan = {static_cast(x)}; + return x_nan.exponent == 0x7FFFU && !(x_nan.is_quiet_nan) && x_nan.payload; +} + #endif // !defined(issignaling) template ::UT> @@ -605,6 +610,10 @@ ALWAYS_INLINE static uint8_t IsNegative(T x) { return static_cast(std::signbit(x)); } +ALWAYS_INLINE static uint8_t IsNegative(float80_t x) { + return static_cast(std::signbit(static_cast(x))); +} + ALWAYS_INLINE static uint8_t IsZero(float32_t x) { return static_cast(FP_ZERO == std::fpclassify(x)); } @@ -613,6 +622,10 @@ ALWAYS_INLINE static uint8_t IsZero(float64_t x) { return static_cast(FP_ZERO == std::fpclassify(x)); } +ALWAYS_INLINE static uint8_t IsZero(float80_t x) { + return static_cast(FP_ZERO == std::fpclassify(static_cast(x))); +} + ALWAYS_INLINE static uint8_t IsInfinite(float32_t x) { return static_cast(FP_INFINITE == std::fpclassify(x)); } @@ -621,6 +634,10 @@ ALWAYS_INLINE static uint8_t IsInfinite(float64_t x) { return static_cast(FP_INFINITE == std::fpclassify(x)); } +ALWAYS_INLINE static uint8_t IsInfinite(float80_t x) { + return static_cast(FP_INFINITE == std::fpclassify(static_cast(x))); +} + ALWAYS_INLINE static uint8_t IsNaN(float32_t x) { return static_cast(FP_NAN == std::fpclassify(x)); } @@ -629,6 +646,10 @@ ALWAYS_INLINE static uint8_t IsNaN(float64_t x) { return static_cast(FP_NAN == std::fpclassify(x)); } +ALWAYS_INLINE static uint8_t IsNaN(float80_t x) { + return static_cast(FP_NAN == std::fpclassify(static_cast(x))); +} + ALWAYS_INLINE static uint8_t IsSignalingNaN(float32_t x) { const nan32_t x_nan = {x}; return x_nan.exponent == 0xFFU && !x_nan.is_quiet_nan && x_nan.payload; @@ -639,6 +660,10 @@ ALWAYS_INLINE static uint8_t IsSignalingNaN(float64_t x) { return x_nan.exponent == 0x7FFU && !x_nan.is_quiet_nan && x_nan.payload; } +ALWAYS_INLINE static uint8_t IsSignalingNaN(float80_t x) { + return IsSignalingNaN(static_cast(x)); +} + template ALWAYS_INLINE static uint8_t IsSignalingNaN(T) { return 0; @@ -652,6 +677,10 @@ ALWAYS_INLINE static uint8_t IsDenormal(float64_t x) { return static_cast(FP_SUBNORMAL == std::fpclassify(x)); } +ALWAYS_INLINE static uint8_t IsDenormal(float80_t x) { + return static_cast(FP_SUBNORMAL == std::fpclassify(static_cast(x))); +} + template ALWAYS_INLINE static uint8_t IsZero(T val) { return static_cast(!val); @@ -702,6 +731,7 @@ MAKE_CONVERT(uint64_t, UInt64) MAKE_CONVERT(uint128_t, UInt128) MAKE_CONVERT(float32_t, Float32) MAKE_CONVERT(float64_t, Float64) +MAKE_CONVERT(float80_t, Float80) #undef MAKE_CONVERT @@ -922,6 +952,7 @@ ALWAYS_INLINE static auto TruncTo(T val) -> typename IntegerType
::BT { make_float_op(F##name##32, float32_t, float32_t, op) \ make_float_op(F##name, float64_t, float64_t, op) \ make_float_op(F##name##64, float64_t, float64_t, op) \ + make_float_op(F##name, float80_t, float80_t, op) \ make_float_op(F##name##80, float80_t, float80_t, op) MAKE_OPS(Add, +, MAKE_BINOP, MAKE_BINOP) @@ -1120,22 +1151,6 @@ ALWAYS_INLINE static int64_t SAbs(int64_t val) { return val < 0 ? -val : val; } -ALWAYS_INLINE static float32_t FAbs(float32_t val) { - return __builtin_fabsf(val); -} - -ALWAYS_INLINE static float64_t FAbs(float64_t val) { - return __builtin_fabs(val); -} - -ALWAYS_INLINE static float32_t FAbs32(float32_t val) { - return __builtin_fabsf(val); -} - -ALWAYS_INLINE static float64_t FAbs64(float64_t val) { - return __builtin_fabs(val); -} - template ALWAYS_INLINE static auto SAbs(typename IntegerType::ST val) -> typename IntegerType::ST { @@ -1498,6 +1513,45 @@ MAKE_BUILTIN(CountTrailingZeros, 64, 64, __builtin_ctzll, 0) #undef MAKE_BUILTIN + +#define MAKE_BUILTIN_INTRINSIC(name, intrinsic_name, size, type) \ + ALWAYS_INLINE static type name(type val) { \ + return intrinsic_name(val); \ + } \ + ALWAYS_INLINE static type name##size(type val) { \ + return intrinsic_name(val); \ + } + +#if defined(__x86_64__) || defined(__i386__) || defined(_M_X86) +#define MAKE_BUILTIN(name, intrinsic_name) \ + MAKE_BUILTIN_INTRINSIC(name, intrinsic_name##f, 32, float32_t) \ + MAKE_BUILTIN_INTRINSIC(name, intrinsic_name, 64, float64_t) \ + MAKE_BUILTIN_INTRINSIC(name, intrinsic_name##l, 80, float80_t) +#else +#define MAKE_BUILTIN(name, intrinsic_name) \ + MAKE_BUILTIN_INTRINSIC(name, intrinsic_name##f, 32, float32_t) \ + MAKE_BUILTIN_INTRINSIC(name, intrinsic_name, 64, float64_t) \ + MAKE_BUILTIN_INTRINSIC(name, intrinsic_name, 80, float80_t) +#endif + +MAKE_BUILTIN(FAbs, __builtin_fabs); +MAKE_BUILTIN(FCos, __builtin_cos) +MAKE_BUILTIN(FSin, __builtin_sin) +MAKE_BUILTIN(FTan, __builtin_tan) +MAKE_BUILTIN(FAtan,__builtin_atan) +MAKE_BUILTIN(FSqrt,__builtin_sqrt) +MAKE_BUILTIN(FExp2,__builtin_exp2) +MAKE_BUILTIN(FLog2,__builtin_log2) + +MAKE_BUILTIN(FRoundUsingMode, __builtin_nearbyint); +MAKE_BUILTIN(FTruncTowardZero, __builtin_trunc); +MAKE_BUILTIN(FRoundAwayFromZero, __builtin_round); +MAKE_BUILTIN(FRoundToPositiveInfinity, __builtin_ceil); +MAKE_BUILTIN(FRoundToNegativeInfinity, __builtin_floor); + +#undef MAKE_BUILTIN_INTRINSIC +#undef MAKE_BUILTIN + ALWAYS_INLINE static int16_t Float64ToInt16(float64_t val) { auto max_int = Float64(Maximize(Int16(0))); return Select(FCmpLt(max_int, FAbs(val)), Int16(0x8000), Int16(val)); @@ -1509,6 +1563,17 @@ ALWAYS_INLINE static int32_t Float64ToInt32(float64_t val) { Int32(val)); } +ALWAYS_INLINE static int16_t Float80ToInt16(float80_t val) { + auto max_int = Float80(Float64(Maximize(Int16(0)))); + return Select(FCmpLt80(max_int, FAbs80(val)), Int16(0x8000), Int16(val)); +} + +ALWAYS_INLINE static int32_t Float80ToInt32(float80_t val) { + auto max_int = Float80(Float64(Maximize(Int32(0)))); + return Select(FCmpLt80(max_int, FAbs80(val)), Int32(0x80000000), + Int32(val)); +} + ALWAYS_INLINE static int16_t Float32ToInt16(float32_t val) { auto max_int = Float32(Maximize(Int32(0))); return Select(FCmpLt(max_int, FAbs(val)), Int16(0x8000), Int16(val)); @@ -1530,28 +1595,10 @@ ALWAYS_INLINE static int64_t Float64ToInt64(float64_t val) { Int64(0x8000000000000000LL), Int64(val)); } -ALWAYS_INLINE static float32_t FRoundUsingMode32(float32_t val) { - return __builtin_nearbyintf(val); -} - -ALWAYS_INLINE static float64_t FRoundUsingMode64(float64_t val) { - return __builtin_nearbyint(val); -} - -ALWAYS_INLINE static float32_t FTruncTowardZero32(float32_t val) { - return __builtin_truncf(val); -} - -ALWAYS_INLINE static float64_t FTruncTowardZero64(float64_t val) { - return __builtin_trunc(val); -} - -ALWAYS_INLINE static float32_t FRoundAwayFromZero32(float32_t val) { - return __builtin_roundf(val); -} - -ALWAYS_INLINE static float64_t FRoundAwayFromZero64(float64_t val) { - return __builtin_round(val); +ALWAYS_INLINE static int64_t Float80ToInt64(float80_t val) { + auto max_int = Float80(Float64(Maximize(Int64(0)))); + return Select(FCmpLt80(max_int, FAbs80(val)), + Int64(0x8000000000000000LL), Int64(val)); } ALWAYS_INLINE static float32_t FRoundToNearestEven32(float32_t val) { @@ -1594,20 +1641,4 @@ ALWAYS_INLINE static float64_t FRoundToNearestEven64(float64_t val) { // } } -ALWAYS_INLINE static float32_t FRoundToPositiveInfinity32(float32_t val) { - return __builtin_ceilf(val); -} - -ALWAYS_INLINE static float64_t FRoundToPositiveInfinity64(float64_t val) { - return __builtin_ceil(val); -} - -ALWAYS_INLINE static float32_t FRoundToNegativeInfinity32(float32_t val) { - return __builtin_floorf(val); -} - -ALWAYS_INLINE static float64_t FRoundToNegativeInfinity64(float64_t val) { - return __builtin_floor(val); -} - } // namespace diff --git a/include/remill/Arch/Runtime/Types.h b/include/remill/Arch/Runtime/Types.h index c1bd1a87e..b30ba4d36 100644 --- a/include/remill/Arch/Runtime/Types.h +++ b/include/remill/Arch/Runtime/Types.h @@ -65,44 +65,86 @@ static_assert(4 == sizeof(float32_t), "Invalid `float32_t` size."); typedef double float64_t; static_assert(8 == sizeof(float64_t), "Invalid `float64_t` size."); +#if defined(__x86_64__) || defined(__i386__) || defined(_M_X86) +typedef long double float128_t; +static_assert(12 == sizeof(float128_t) || 16 == sizeof(float128_t), "Invalid `float128_t` size."); +#else typedef double float128_t; static_assert(8 == sizeof(float128_t), "Invalid `float128_t` size."); +#endif + -// TODO(pag): Assumes little endian. struct float80_t final { uint8_t data[10]; + + inline ~float80_t(void) = default; + inline float80_t(void) : data{0,} {} + + float80_t(const float80_t &) = default; + float80_t &operator=(const float80_t &) = default; + #if defined(__x86_64__) || defined(__i386__) || defined(_M_X86) - // convert a long double into an f80 representation on x86/x86-64 - // this assumes long double uses the f80 format internally, but - // is simply aligned to an even boundary (hence size 12 or 16) - float80_t(long double ld) { + inline float80_t(long double ld) { static_assert(12 == sizeof(long double) || 16 == sizeof(long double), "Invalid `long double` size."); - union ld_union { - long double ld_val; - struct { - char pad[sizeof(long double) - sizeof(float80_t)]; - float80_t f80; - } __attribute__((packed)) f80_data; - }; - - ld_union ldu {.ld_val = ld}; - std::memcpy(&data[0], &ldu.f80_data.f80.data[0], sizeof(data)); + + union union_ld { + uint8_t data[sizeof(long double)]; + long double ld; + } __attribute((packed)); + + union_ld f80 = {.ld = ld}; + for (unsigned i = 0; i < sizeof(float80_t); i++) { + this->data[i] = f80.data[i]; + } } - operator long double() const { + operator long double() { static_assert(12 == sizeof(long double) || 16 == sizeof(long double), "Invalid `long double` size."); - union ld_union { - long double ld_val; - struct { - char pad[sizeof(long double) - sizeof(float80_t)]; - float80_t f80; - } __attribute__((packed)) f80_data; - }; - - ld_union ldu {0}; - std::memcpy(&ldu.f80_data.f80.data[0], &data[0], sizeof(data)); - return ldu.ld_val; + + union union_ld { + uint8_t data[sizeof(long double)]; + long double ld; + } __attribute((packed)); + + union_ld f80 = {{0}}; + for (unsigned i = 0; i < sizeof(float80_t); i++) { + f80.data[i] = this->data[i]; + } + + return f80.ld; + } + +#else + inline float80_t(double ld) { + static_assert(8 == sizeof(double), "Invalid `double` size."); + + union union_ld { + uint8_t data[sizeof(double)]; + double ld; + } __attribute((packed)); + + union_ld f80 = {.ld = ld}; + for (unsigned i = 0; i < sizeof(double); i++) { + this->data[i] = f80.data[i]; + } + } + + operator double() { + static_assert(8 == sizeof(double), "Invalid `double` size."); + + union union_ld { + uint8_t data[sizeof(double)]; + double ld; + } __attribute((packed)); + + union_ld f80 = {{0}}; + for (unsigned i = 0; i < sizeof(double); i++) { + f80.data[i] = this->data[i]; + } + + return f80.ld; } + #endif } __attribute__((packed)); @@ -134,6 +176,20 @@ union nan64_t { static_assert(sizeof(float64_t) == sizeof(nan64_t), "Invalid packing of `nan64_t`."); +union nan80_t { + float80_t d; + struct { + uint64_t payload : 62; + uint64_t is_quiet_nan : 1; + uint64_t interger_bit : 1; + uint64_t exponent : 15; + uint64_t is_negative : 1; + } __attribute__((packed)); +} __attribute__((packed)); + +static_assert(sizeof(float80_t) == sizeof(nan80_t), + "Invalid packing of `nan80_t`."); + // Note: We are re-defining the `std::is_signed` type trait because we can't // always explicitly specialize it inside of the `std` namespace. @@ -476,6 +532,11 @@ struct Rn final { const float64_t val; }; +template <> +struct Rn final { + const float80_t val; +}; + template <> struct RnW final { float32_t *const val_ref; @@ -551,9 +612,6 @@ struct BaseType { typedef T BT; }; -template <> -struct BaseType : public BaseType {}; - template struct BaseType : public BaseType {}; diff --git a/include/remill/Arch/X86/Runtime/State.h b/include/remill/Arch/X86/Runtime/State.h index e47c24616..a5b2f83e0 100644 --- a/include/remill/Arch/X86/Runtime/State.h +++ b/include/remill/Arch/X86/Runtime/State.h @@ -735,15 +735,6 @@ struct alignas(8) GPR final { static_assert(272 == sizeof(GPR), "Invalid structure packing of `GPR`."); -/* -struct alignas(8) X87Stack final { - struct alignas(8) { - uint64_t _0; - float64_t val; - } __attribute__((packed)) elems[8]; -}; -*/ - // Declare val as float80_t struct alignas(16) X87Stack final { struct alignas(16) { diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp index 5eb0a600c..2c0b6d563 100644 --- a/lib/Arch/X86/Arch.cpp +++ b/lib/Arch/X86/Arch.cpp @@ -1221,6 +1221,7 @@ void X86Arch::PopulateBasicBlockFunction(llvm::Module *module, auto u32 = llvm::Type::getInt32Ty(context); auto u64 = llvm::Type::getInt64Ty(context); auto f64 = llvm::Type::getDoubleTy(context); + auto f80 = llvm::Type::getX86_FP80Ty(context); auto v128 = llvm::ArrayType::get(llvm::Type::getInt8Ty(context), 128u / 8u); auto v256 = llvm::ArrayType::get(llvm::Type::getInt8Ty(context), 256u / 8u); auto v512 = llvm::ArrayType::get(llvm::Type::getInt8Ty(context), 512u / 8u); @@ -1492,14 +1493,14 @@ void X86Arch::PopulateBasicBlockFunction(llvm::Module *module, SUB_REG(XMM31, vec[31].xmm, v128, YMM31); } - REG(ST0, st.elems[0].val, f64); - REG(ST1, st.elems[1].val, f64); - REG(ST2, st.elems[2].val, f64); - REG(ST3, st.elems[3].val, f64); - REG(ST4, st.elems[4].val, f64); - REG(ST5, st.elems[5].val, f64); - REG(ST6, st.elems[6].val, f64); - REG(ST7, st.elems[7].val, f64); + REG(ST0, st.elems[0].val, f80); + REG(ST1, st.elems[1].val, f80); + REG(ST2, st.elems[2].val, f80); + REG(ST3, st.elems[3].val, f80); + REG(ST4, st.elems[4].val, f80); + REG(ST5, st.elems[5].val, f80); + REG(ST6, st.elems[6].val, f80); + REG(ST7, st.elems[7].val, f80); #if 0 // TODO(pag): Don't emulate directly for now. if (32 == address_size) { diff --git a/lib/Arch/X86/Semantics/X87.cpp b/lib/Arch/X86/Semantics/X87.cpp index dee967571..086384087 100644 --- a/lib/Arch/X86/Semantics/X87.cpp +++ b/lib/Arch/X86/Semantics/X87.cpp @@ -16,6 +16,7 @@ #pragma once + #define PUSH_X87_STACK(x) \ do { \ auto __x = x; \ @@ -117,10 +118,10 @@ DEF_FPU_SEM(FILD, RF80W, T src1) { template DEF_FPU_SEM(FLD, RF80W, T src1) { SetFPUIpOp(); - auto val = Read(src1); - state.sw.ie |= IsSignalingNaN(val); - state.sw.de = IsDenormal(val); - auto res = Float64(val); + auto value = Read(src1); + state.sw.ie |= IsSignalingNaN(value); + state.sw.de = IsDenormal(value); + auto res = Float64(value); // Quietize if signaling NaN. if (state.sw.ie) { @@ -129,15 +130,15 @@ DEF_FPU_SEM(FLD, RF80W, T src1) { res = res_nan.d; } - PUSH_X87_STACK(res); + PUSH_X87_STACK(Float80(res)); return memory; } -DEF_FPU_SEM(FLDfromstack, RF80W, RF80 src1) { +DEF_FPU_SEM(FLDfromstack, RF80W, RF80W src1) { SetFPUIpOp(); state.sw.ie = 0; state.sw.de = 0; - PUSH_X87_STACK(Read(src1)); + PUSH_X87_STACK(Float80(Read(src1))); return memory; } @@ -196,29 +197,27 @@ DEF_FPU_SEM(DoFLDPI) { DEF_FPU_SEM(DoFABS) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); - float64_t res = CheckedFloatUnaryOp(state, FAbs64, st0); + float80_t st0 = Read(X87_ST0); + float80_t res = CheckedFloatUnaryOp(state, FAbs80, st0); Write(X87_ST0, res); return memory; } DEF_FPU_SEM(DoFCHS) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); - float64_t res = CheckedFloatUnaryOp(state, FNeg64, st0); + float80_t st0 = Read(X87_ST0); + float80_t res = CheckedFloatUnaryOp(state, FNeg80, st0); Write(X87_ST0, res); return memory; } -#define WRAP_BUILTIN(name, type, builtin) \ - ALWAYS_INLINE static type name(type x) { \ - return builtin(x); \ - } - -WRAP_BUILTIN(FCos64, float64_t, __builtin_cos) -WRAP_BUILTIN(FSin64, float64_t, __builtin_sin) -WRAP_BUILTIN(FTan64, float64_t, __builtin_tan) -WRAP_BUILTIN(FSqrt64, float64_t, __builtin_sqrt) +#if defined(__x86_64__) || defined(__i386__) || defined(_M_X86) +#define __builtin_fmod_f80 __builtin_fmodl +#define __builtin_remainder_f80 __builtin_remainderl +#else +#define __builtin_fmod_f80 __builtin_fmod +#define __builtin_remainder_f80 __builtin_remainder +#endif // NOTE(pag): This only sort of, but doesn't really make sense. That is, it's // a reasonable guess-y way to say whether or not a given value can @@ -232,12 +231,16 @@ ALWAYS_INLINE static uint8_t IsImprecise(float64_t x) { return 0 != (reinterpret_cast(x) & 0xFF); } +ALWAYS_INLINE static uint8_t IsImprecise(float80_t x) { + return 0 != (reinterpret_cast(x) & 0x7FFFF); +} + DEF_FPU_SEM(DoFCOS) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); + float80_t st0 = Read(X87_ST0); state.sw.ie |= IsSignalingNaN(st0) | IsInfinite(st0); state.sw.de = IsDenormal(st0); - auto res = CheckedFloatUnaryOp(state, FCos64, st0); + float80_t res = CheckedFloatUnaryOp(state, FCos80, st0); if (!IsNaN(res)) { state.sw.pe = IsImprecise(res); } @@ -247,10 +250,10 @@ DEF_FPU_SEM(DoFCOS) { DEF_FPU_SEM(DoFSIN) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); + float80_t st0 = Read(X87_ST0); state.sw.ie |= IsSignalingNaN(st0) | IsInfinite(st0); state.sw.de = IsDenormal(st0); - auto res = CheckedFloatUnaryOp(state, FSin64, st0); + float80_t res = CheckedFloatUnaryOp(state, FSin80, st0); if (!IsNaN(res)) { state.sw.pe = IsImprecise(res); } @@ -260,10 +263,10 @@ DEF_FPU_SEM(DoFSIN) { DEF_FPU_SEM(DoFPTAN) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); + float80_t st0 = Read(X87_ST0); state.sw.ie |= IsSignalingNaN(st0) | IsInfinite(st0); state.sw.de = IsDenormal(st0); - auto res = CheckedFloatUnaryOp(state, FTan64, st0); + float80_t res = CheckedFloatUnaryOp(state, FTan80, st0); if (!IsNaN(res)) { state.sw.pe = IsImprecise(res); } @@ -275,47 +278,47 @@ DEF_FPU_SEM(DoFPTAN) { DEF_FPU_SEM(DoFPATAN) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); - float64_t st1 = Read(X87_ST1); - float64_t res = CheckedFloatBinOp(state, FDiv64, st1, st0); + float80_t st0 = Read(X87_ST0); + float80_t st1 = Read(X87_ST1); + float80_t res = CheckedFloatBinOp(state, FDiv80, st1, st0); if (!state.sw.ie) { state.sw.ie = IsSignalingNaN(res) | IsInfinite(res); state.sw.de = IsDenormal(res); state.sw.pe = IsImprecise(res); } - Write(X87_ST1, __builtin_atan(res)); + Write(X87_ST1, FAtan80(res)); (void) POP_X87_STACK(); return memory; } DEF_FPU_SEM(DoFSQRT) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); + auto st0 = Float64(Read(X87_ST0)); if (IsZero(st0)) { state.sw.ie = 0; state.sw.de = 0; state.sw.pe = 0; - Write(X87_ST0, st0); + Write(X87_ST0, Float80(st0)); } else { state.sw.ie |= IsSignalingNaN(st0) | IsNegative(st0); state.sw.de = IsDenormal(st0); - float64_t res = CheckedFloatUnaryOp(state, FSqrt64, st0); + auto res = CheckedFloatUnaryOp(state, FSqrt64, st0); if (!IsNaN(res)) { state.sw.pe = IsImprecise(res); } - Write(X87_ST0, res); + Write(X87_ST0, Float80(res)); } return memory; } DEF_FPU_SEM(DoFSINCOS) { SetFPUIpOp(); - auto st0 = Read(X87_ST0); + float80_t st0 = Read(X87_ST0); state.sw.ie |= IsSignalingNaN(st0) | IsInfinite(st0); state.sw.de = IsDenormal(st0); - auto sin_res = CheckedFloatUnaryOp(state, FSin64, st0); - auto cos_res = CheckedFloatUnaryOp(state, FCos64, st0); + float80_t sin_res = CheckedFloatUnaryOp(state, FSin80, st0); + float80_t cos_res = CheckedFloatUnaryOp(state, FCos80, st0); if (!IsNaN(sin_res) && !IsNaN(cos_res)) { state.sw.pe = IsImprecise(sin_res) | IsImprecise(cos_res); } @@ -326,19 +329,19 @@ DEF_FPU_SEM(DoFSINCOS) { DEF_FPU_SEM(DoFSCALE) { SetFPUIpOp(); - auto st1_int = __builtin_trunc(Read(X87_ST1)); // Round toward zero. - auto shift = __builtin_exp2(st1_int); - Write(X87_ST0, FMul(Read(X87_ST0), shift)); + auto st1_int = FTruncTowardZero(Read(X87_ST1)); // Round toward zero. + auto shift = FExp2(st1_int); + Write(X87_ST0, FMul(Read(X87_ST0), Float80(shift))); return memory; } DEF_FPU_SEM(DoF2XM1) { SetFPUIpOp(); - auto st0 = Read(X87_ST0); + float80_t st0 = Read(X87_ST0); state.sw.ie |= IsSignalingNaN(st0) | IsInfinite(st0); state.sw.de = IsDenormal(st0); state.sw.ue = 0; // TODO(pag): Not sure. - auto res = FSub(__builtin_exp2(st0), 1.0); + float80_t res = FSub(Float80(FExp2(st0)), Float80(1.0)); if (!IsNaN(res)) { state.sw.pe = IsImprecise(res); // TODO(pag): Not sure. } @@ -348,12 +351,12 @@ DEF_FPU_SEM(DoF2XM1) { DEF_FPU_SEM(DoFPREM) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); - float64_t st1 = Read(X87_ST1); - auto rem = __builtin_fmod(st0, st1); - Write(X87_ST0, rem); + auto st0 = Read(X87_ST0); + auto st1 = Read(X87_ST1); + auto rem = __builtin_fmod_f80(st0, st1); + Write(X87_ST0, Float80(rem)); - auto quot = Int64(FTruncTowardZero64(FDiv(st0, st1))); + auto quot = Int64(FTruncTowardZero(FDiv(st0, st1))); auto quot_lsb = TruncTo(UInt64(SAbs(quot))); state.sw.c0 = UAnd(UShr(quot_lsb, 2_u8), 1_u8); // Q2. state.sw.c2 = 0; // Assumes it's not a partial remainder. @@ -364,11 +367,11 @@ DEF_FPU_SEM(DoFPREM) { DEF_FPU_SEM(DoFPREM1) { SetFPUIpOp(); - float64_t st0 = Read(X87_ST0); - float64_t st1 = Read(X87_ST1); - auto rem = __builtin_remainder(st0, st1); - Write(X87_ST0, rem); - auto quot = Float64ToInt64(FDiv(st0, st1)); + auto st0 = Read(X87_ST0); + auto st1 = Read(X87_ST1); + auto rem = __builtin_remainder_f80(st0, st1); + Write(X87_ST0, Float80(rem)); + auto quot = Float80ToInt64(FDiv80(st0, st1)); auto quot_lsb = TruncTo(UInt64(SAbs(quot))); state.sw.c0 = UAnd(UShr(quot_lsb, 2_u8), 1_u8); // Q2. state.sw.c2 = 0; // Assumes it's not a partial remainder. @@ -432,20 +435,20 @@ DEF_ISEL(FPREM1) = DoFPREM1; namespace { template -DEF_FPU_SEM(FSUB, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FSUB, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); - Write(dst, CheckedFloatBinOp(state, FSub64, Read(src1), Float64(Read(src2)))); + Write(dst, CheckedFloatBinOp(state, FSub80, Read(src1), Float80(Read(src2)))); return memory; } template -DEF_FPU_SEM(FSUBmem, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FSUBmem, RF80W dst, RF80W src1, T src2) { SetFPUDp(src2); return FSUB(memory, state, dst, src1, src2, pc, fop); } template -DEF_FPU_SEM(FSUBP, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FSUBP, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); memory = FSUB(memory, state, dst, src1, src2, pc, fop); (void) POP_X87_STACK(); @@ -453,29 +456,29 @@ DEF_FPU_SEM(FSUBP, RF80W dst, RF80 src1, T src2) { } template -DEF_FPU_SEM(FISUB, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FISUB, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); SetFPUDp(src2); - Write(dst, CheckedFloatBinOp(state, FSub64, Read(src1), - Float64(Signed(Read(src2))))); + Write(dst, CheckedFloatBinOp(state, FSub80, Read(src1), + Float80(Float64(Signed(Read(src2)))))); return memory; } template -DEF_FPU_SEM(FSUBR, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FSUBR, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); - Write(dst, CheckedFloatBinOp(state, FSub64, Float64(Read(src2)), Read(src1))); + Write(dst, CheckedFloatBinOp(state, FSub80, Float80(Float64(Read(src2))), Read(src1))); return memory; } template -DEF_FPU_SEM(FSUBRmem, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FSUBRmem, RF80W dst, RF80W src1, T src2) { SetFPUDp(src2); return FSUBR(memory, state, dst, src1, src2, pc, fop); } template -DEF_FPU_SEM(FSUBRP, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FSUBRP, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); memory = FSUBR(memory, state, dst, src1, src2, pc, fop); (void) POP_X87_STACK(); @@ -483,10 +486,10 @@ DEF_FPU_SEM(FSUBRP, RF80W dst, RF80 src1, T src2) { } template -DEF_FPU_SEM(FISUBR, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FISUBR, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); SetFPUDp(src2); - Write(dst, CheckedFloatBinOp(state, FSub64, Float64(Signed(Read(src2))), + Write(dst, CheckedFloatBinOp(state, FSub80, Float80(Float64(Signed(Read(src2)))), Read(src1))); return memory; } @@ -494,15 +497,15 @@ DEF_FPU_SEM(FISUBR, RF80W dst, RF80 src1, T src2) { DEF_ISEL(FSUB_ST0_MEMmem32real) = FSUBmem; DEF_ISEL(FSUB_ST0_MEMm64real) = FSUBmem; -DEF_ISEL(FSUB_ST0_X87) = FSUB; -DEF_ISEL(FSUB_X87_ST0) = FSUB; -DEF_ISEL(FSUBP_X87_ST0) = FSUBP; +DEF_ISEL(FSUB_ST0_X87) = FSUB; +DEF_ISEL(FSUB_X87_ST0) = FSUB; +DEF_ISEL(FSUBP_X87_ST0) = FSUBP; DEF_ISEL(FSUBR_ST0_MEMmem32real) = FSUBRmem; DEF_ISEL(FSUBR_ST0_MEMm64real) = FSUBRmem; -DEF_ISEL(FSUBR_ST0_X87) = FSUBR; -DEF_ISEL(FSUBR_X87_ST0) = FSUBR; -DEF_ISEL(FSUBRP_X87_ST0) = FSUBRP; +DEF_ISEL(FSUBR_ST0_X87) = FSUBR; +DEF_ISEL(FSUBR_X87_ST0) = FSUBR; +DEF_ISEL(FSUBRP_X87_ST0) = FSUBRP; DEF_ISEL(FISUB_ST0_MEMmem32int) = FISUB; DEF_ISEL(FISUB_ST0_MEMmem16int) = FISUB; @@ -512,9 +515,10 @@ DEF_ISEL(FISUBR_ST0_MEMmem16int) = FISUBR; namespace { template -DEF_FPU_SEM(FADD, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FADD, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); - Write(dst, CheckedFloatBinOp(state, FAdd64, Read(src1), Float64(Read(src2)))); + float64_t f64 = Float64(Read(src2)); + Write(dst, CheckedFloatBinOp(state, FAdd80, Read(src1), Float80(f64))); // state.sw.c1 = 1; state.sw.c0 = UUndefined8(); @@ -524,135 +528,135 @@ DEF_FPU_SEM(FADD, RF80W dst, RF80 src1, T src2) { } template -DEF_FPU_SEM(FADDmem, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FADDmem, RF80W dst, RF80W src1, T src2) { SetFPUDp(src2); return FADD(memory, state, dst, src1, src2, pc, fop); } template -DEF_FPU_SEM(FADDP, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FADDP, RF80W dst, RF80W src1, T src2) { memory = FADD(memory, state, dst, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; } template -DEF_FPU_SEM(FIADD, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FIADD, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); SetFPUDp(src2); - Write(dst, CheckedFloatBinOp(state, FAdd64, Read(src1), - Float64(Signed(Read(src2))))); + float64_t res = CheckedFloatBinOp(state, FAdd64, Float64(Read(src1)), Float64(Signed(Read(src2)))); + Write(dst, Float80(res)); return memory; } } // namespace DEF_ISEL(FADD_ST0_MEMmem32real) = FADDmem; -DEF_ISEL(FADD_ST0_X87) = FADD; +DEF_ISEL(FADD_ST0_X87) = FADD; DEF_ISEL(FADD_ST0_MEMm64real) = FADDmem; -DEF_ISEL(FADD_X87_ST0) = FADD; -DEF_ISEL(FADDP_X87_ST0) = FADDP; +DEF_ISEL(FADD_X87_ST0) = FADD; +DEF_ISEL(FADDP_X87_ST0) = FADDP; DEF_ISEL(FIADD_ST0_MEMmem32int) = FIADD; DEF_ISEL(FIADD_ST0_MEMmem16int) = FIADD; namespace { template -DEF_FPU_SEM(FMUL, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FMUL, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); - Write(dst, CheckedFloatBinOp(state, FMul64, Read(src1), Float64(Read(src2)))); + Write(dst, CheckedFloatBinOp(state, FMul80, Read(src1), Float80(Read(src2)))); return memory; } template -DEF_FPU_SEM(FMULmem, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FMULmem, RF80W dst, RF80W src1, T src2) { SetFPUDp(src2); return FMUL(memory, state, dst, src1, src2, pc, fop); } template -DEF_FPU_SEM(FMULP, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FMULP, RF80W dst, RF80W src1, T src2) { memory = FMUL(memory, state, dst, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; } template -DEF_FPU_SEM(FIMUL, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FIMUL, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); SetFPUDp(src2); - Write(dst, CheckedFloatBinOp(state, FMul64, Read(src1), - Float64(Signed(Read(src2))))); + auto res = CheckedFloatBinOp(state, FMul64, Float64(Read(src1)), Float64(Signed(Read(src2)))); + Write(dst, Float80(res)); return memory; } } // namespace DEF_ISEL(FMUL_ST0_MEMmem32real) = FMULmem; -DEF_ISEL(FMUL_ST0_X87) = FMUL; +DEF_ISEL(FMUL_ST0_X87) = FMUL; DEF_ISEL(FMUL_ST0_MEMm64real) = FMULmem; -DEF_ISEL(FMUL_X87_ST0) = FMUL; -DEF_ISEL(FMULP_X87_ST0) = FMULP; +DEF_ISEL(FMUL_X87_ST0) = FMUL; +DEF_ISEL(FMULP_X87_ST0) = FMULP; DEF_ISEL(FIMUL_ST0_MEMmem32int) = FIMUL; DEF_ISEL(FIMUL_ST0_MEMmem16int) = FIMUL; namespace { template -DEF_FPU_SEM(FDIV, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FDIV, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); - Write(dst, CheckedFloatBinOp(state, FDiv64, Read(src1), Float64(Read(src2)))); + Write(dst, CheckedFloatBinOp(state, FDiv80, Read(src1), Float80(Read(src2)))); return memory; } template -DEF_FPU_SEM(FDIVmem, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FDIVmem, RF80W dst, RF80W src1, T src2) { SetFPUDp(src2); return FDIV(memory, state, dst, src1, src2, pc, fop); } template -DEF_FPU_SEM(FDIVP, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FDIVP, RF80W dst, RF80W src1, T src2) { memory = FDIV(memory, state, dst, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; } template -DEF_FPU_SEM(FIDIV, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FIDIV, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); SetFPUDp(src2); - Write(dst, CheckedFloatBinOp(state, FDiv64, Read(src1), - Float64(Signed(Read(src2))))); + Write(dst, Float80(CheckedFloatBinOp(state, FDiv64, Float64(Read(src1)), Float64(Signed(Read(src2)))))); return memory; } template -DEF_FPU_SEM(FDIVR, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FDIVR, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); - Write(dst, CheckedFloatBinOp(state, FDiv64, Float64(Read(src2)), Read(src1))); + auto res = CheckedFloatBinOp(state, FDiv64, Float64(Read(src2)), Float64(Read(src1))); + Write(dst, Float80(res)); return memory; } template -DEF_FPU_SEM(FDIVRmem, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FDIVRmem, RF80W dst, RF80W src1, T src2) { SetFPUDp(src2); return FDIVR(memory, state, dst, src1, src2, pc, fop); } template -DEF_FPU_SEM(FDIVRP, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FDIVRP, RF80W dst, RF80W src1, T src2) { memory = FDIVR(memory, state, dst, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; } template -DEF_FPU_SEM(FIDIVR, RF80W dst, RF80 src1, T src2) { +DEF_FPU_SEM(FIDIVR, RF80W dst, RF80W src1, T src2) { SetFPUIpOp(); SetFPUDp(src2); - Write(dst, CheckedFloatBinOp(state, FDiv64, Float64(Signed(Read(src2))), - Read(src1))); + auto res = CheckedFloatBinOp(state, FDiv64, Float64(Signed(Read(src2))), Float64(Read(src1))); + Write(dst, Float80(res)); return memory; } @@ -660,15 +664,15 @@ DEF_FPU_SEM(FIDIVR, RF80W dst, RF80 src1, T src2) { DEF_ISEL(FDIV_ST0_MEMmem32real) = FDIVmem; DEF_ISEL(FDIV_ST0_MEMm64real) = FDIVmem; -DEF_ISEL(FDIV_ST0_X87) = FDIV; -DEF_ISEL(FDIV_X87_ST0) = FDIV; -DEF_ISEL(FDIVP_X87_ST0) = FDIVP; +DEF_ISEL(FDIV_ST0_X87) = FDIV; +DEF_ISEL(FDIV_X87_ST0) = FDIV; +DEF_ISEL(FDIVP_X87_ST0) = FDIVP; DEF_ISEL(FDIVR_ST0_MEMmem32real) = FDIVRmem; DEF_ISEL(FDIVR_ST0_MEMm64real) = FDIVRmem; -DEF_ISEL(FDIVR_ST0_X87) = FDIVR; -DEF_ISEL(FDIVR_X87_ST0) = FDIVR; -DEF_ISEL(FDIVRP_X87_ST0) = FDIVRP; +DEF_ISEL(FDIVR_ST0_X87) = FDIVR; +DEF_ISEL(FDIVR_X87_ST0) = FDIVR; +DEF_ISEL(FDIVRP_X87_ST0) = FDIVRP; DEF_ISEL(FIDIV_ST0_MEMmem32int) = FIDIV; DEF_ISEL(FIDIV_ST0_MEMmem16int) = FIDIV; @@ -677,7 +681,7 @@ DEF_ISEL(FIDIVR_ST0_MEMmem16int) = FIDIVR; namespace { -DEF_FPU_SEM(FBSTP, MBCD80W dst, RF80 src) { +DEF_FPU_SEM(FBSTP, MBCD80W dst, RF80W src) { SetFPUIpOp(); bcd80_t out_bcd = {}; @@ -728,30 +732,30 @@ DEF_FPU_SEM(FBSTP, MBCD80W dst, RF80 src) { } template -DEF_FPU_SEM(FST, T dst, RF80 src) { +DEF_FPU_SEM(FST, T dst, RF80W src) { SetFPUIpOp(); typedef typename BaseType::BT BT; auto res = CheckedFloatUnaryOp( - state, [=](float64_t x) { return static_cast(x); }, Read(src)); + state, [=](float64_t x) { return static_cast(x); }, Float64(Read(src))); Write(dst, res); return memory; } template -DEF_FPU_SEM(FSTmem, T dst, RF80 src) { +DEF_FPU_SEM(FSTmem, T dst, RF80W src) { SetFPUDp(dst); return FST(memory, state, dst, src, pc, fop); } template -DEF_FPU_SEM(FSTP, T dst, RF80 src) { +DEF_FPU_SEM(FSTP, T dst, RF80W src) { memory = FST(memory, state, dst, src, pc, fop); (void) POP_X87_STACK(); return memory; } template -DEF_FPU_SEM(FSTPmem, T dst, RF80 src) { +DEF_FPU_SEM(FSTPmem, T dst, RF80W src) { SetFPUDp(dst); return FSTP(memory, state, dst, src, pc, fop); } @@ -782,41 +786,40 @@ DEF_HELPER(ConvertToInt, C1 cast, C2 convert, float64_t input) return converted; } -DEF_FPU_SEM(FISTm16, M16W dst, RF80 src) { +DEF_FPU_SEM(FISTm16, M16W dst, RF80W src) { SetFPUIpOp(); SetFPUDp(dst); auto res = - ConvertToInt(memory, state, Int16, Float64ToInt16, Read(src)); + ConvertToInt(memory, state, Int16, Float80ToInt16, Float64(Read(src))); Write(dst, Unsigned(res)); return memory; } -DEF_FPU_SEM(FISTm32, M32W dst, RF80 src) { +DEF_FPU_SEM(FISTm32, M32W dst, RF80W src) { SetFPUIpOp(); - SetFPUDp(dst); auto res = - ConvertToInt(memory, state, Int32, Float64ToInt32, Read(src)); + ConvertToInt(memory, state, Int32, Float64ToInt32, Float64(Read(src))); Write(dst, Unsigned(res)); return memory; } -DEF_FPU_SEM(FISTPm16, M16W dst, RF80 src) { +DEF_FPU_SEM(FISTPm16, M16W dst, RF80W src) { memory = FISTm16(memory, state, dst, src, pc, fop); (void) POP_X87_STACK(); return memory; } -DEF_FPU_SEM(FISTPm32, M32W dst, RF80 src) { +DEF_FPU_SEM(FISTPm32, M32W dst, RF80W src) { memory = FISTm32(memory, state, dst, src, pc, fop); (void) POP_X87_STACK(); return memory; } -DEF_FPU_SEM(FISTPm64, M64W dst, RF80 src) { +DEF_FPU_SEM(FISTPm64, M64W dst, RF80W src) { SetFPUIpOp(); SetFPUDp(dst); auto res = - ConvertToInt(memory, state, Int64, Float64ToInt64, Read(src)); + ConvertToInt(memory, state, Int64, Float64ToInt64, Float64(Read(src))); Write(dst, Unsigned(res)); (void) POP_X87_STACK(); return memory; @@ -884,31 +887,31 @@ DEF_HELPER(TruncateToInt, C1 cast, C2 convert, float64_t input) } namespace { -DEF_FPU_SEM(FISTTPm16, M16W dst, RF80 src) { +DEF_FPU_SEM(FISTTPm16, M16W dst, RF80W src) { SetFPUIpOp(); SetFPUDp(dst); auto res = - TruncateToInt(memory, state, Int16, Float64ToInt16, Read(src)); + TruncateToInt(memory, state, Int16, Float64ToInt16, Float64(Read(src))); Write(dst, Unsigned(res)); (void) POP_X87_STACK(); return memory; } -DEF_FPU_SEM(FISTTPm32, M32W dst, RF80 src) { +DEF_FPU_SEM(FISTTPm32, M32W dst, RF80W src) { SetFPUIpOp(); SetFPUDp(dst); auto res = - TruncateToInt(memory, state, Int32, Float64ToInt32, Read(src)); + TruncateToInt(memory, state, Int32, Float64ToInt32, Float64(Read(src))); Write(dst, Unsigned(res)); (void) POP_X87_STACK(); return memory; } -DEF_FPU_SEM(FISTTPm64, M64W dst, RF80 src) { +DEF_FPU_SEM(FISTTPm64, M64W dst, RF80W src) { SetFPUIpOp(); SetFPUDp(dst); auto res = - TruncateToInt(memory, state, Int64, Float64ToInt64, Read(src)); + TruncateToInt(memory, state, Int64, Float64ToInt64, Float64(Read(src))); Write(dst, Unsigned(res)); (void) POP_X87_STACK(); return memory; @@ -922,7 +925,7 @@ DEF_ISEL(FISTTP_MEMm64int_ST0) = FISTTPm64; namespace { -DEF_FPU_SEM(FXCH, RF80W dst1, RF80 src1, RF80W dst2, RF80 src2) { +DEF_FPU_SEM(FXCH, RF80W dst1, RF80W src1, RF80W dst2, RF80W src2) { SetFPUIpOp(); auto st0 = Read(src1); auto sti = Read(src2); @@ -941,7 +944,7 @@ namespace { DEF_FPU_SEM(DoFXAM) { SetFPUIpOp(); - auto st0 = Read(X87_ST0); + auto st0 = Float64(Read(X87_ST0)); uint8_t sign = __builtin_signbit(st0) == 0 ? 0_u8 : 1_u8; auto c = __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, @@ -1055,85 +1058,88 @@ DEF_FPU_SEM(DoFTST) { // flags more similarly to an ordered compare. Really, the // difference between ordered/unordered is that unordered compares // are silent on SNaNs, whereas ordered ones aren't. - OrderedCompare(memory, state, st0, 0.0); + OrderedCompare(memory, state, Float64(st0), Float64(0.0)); return memory; } template -DEF_FPU_SEM(FUCOM, RF80 src1, S2 src2) { +DEF_FPU_SEM(FUCOM, RF80W src1, S2 src2) { SetFPUIpOp(); auto st0 = Read(src1); - auto sti = Float64(Read(src2)); + auto sti = Read(src2); // Note: Don't modify c1. The docs only state that c1=0 if there was a // stack underflow. - UnorderedCompare(memory, state, st0, sti); + UnorderedCompare(memory, state, Float64(st0), Float64(sti)); return memory; } template -DEF_FPU_SEM(FCOM, RF80 src1, S2 src2) { +DEF_FPU_SEM(FCOM, RF80W src1, S2 src2) { SetFPUIpOp(); auto st0 = Read(src1); - auto sti = Float64(Read(src2)); + auto sti = Read(src2); // Note: Don't modify c1. The docs only state that c1=0 if there was a // stack underflow. - OrderedCompare(memory, state, st0, sti); + OrderedCompare(memory, state, Float64(st0), Float64(sti)); return memory; } template -DEF_FPU_SEM(FUCOMmem, RF80 src1, S2 src2) { +DEF_FPU_SEM(FUCOMmem, RF80W src1, S2 src2) { SetFPUDp(src2); return FUCOM(memory, state, src1, src2, pc, fop); } template -DEF_FPU_SEM(FCOMmem, RF80 src1, S2 src2) { +DEF_FPU_SEM(FCOMmem, RF80W src1, S2 src2) { SetFPUDp(src2); return FCOM(memory, state, src1, src2, pc, fop); } template -DEF_FPU_SEM(FUCOMP, RF80 src1, S2 src2) { +DEF_FPU_SEM(FUCOMP, RF80W src1, S2 src2) { memory = FUCOM(memory, state, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; } template -DEF_FPU_SEM(FCOMP, RF80 src1, S2 src2) { +DEF_FPU_SEM(FCOMP, RF80W src1, S2 src2) { memory = FCOM(memory, state, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; } template -DEF_FPU_SEM(FUCOMPmem, RF80 src1, S2 src2) { +DEF_FPU_SEM(FUCOMPmem, RF80W src1, S2 src2) { SetFPUDp(src2); return FUCOMP(memory, state, src1, src2, pc, fop); } template -DEF_FPU_SEM(FCOMPmem, RF80 src1, S2 src2) { +DEF_FPU_SEM(FCOMPmem, RF80W src1, S2 src2) { SetFPUDp(src2); return FCOMP(memory, state, src1, src2, pc, fop); } DEF_FPU_SEM(DoFUCOMPP) { - RF80 st0 = {X87_ST0}; - RF80 st1 = {X87_ST1}; - memory = FUCOM(memory, state, st0, st1, pc, fop); + SetFPUIpOp(); + RF80W st0 = {&X87_ST0}; + RF80W st1 = {&X87_ST1}; + memory = FUCOM(memory, state, st0, st1, pc, fop); + (void) POP_X87_STACK(); (void) POP_X87_STACK(); return memory; } DEF_FPU_SEM(DoFCOMPP) { - RF80 st0 = {X87_ST0}; - RF80 st1 = {X87_ST1}; - memory = FCOM(memory, state, st0, st1, pc, fop); + RF80W st0 = {&X87_ST0}; + RF80W st1 = {&X87_ST1}; + memory = FCOM(memory, state, st0, st1, pc, fop); + (void) POP_X87_STACK(); (void) POP_X87_STACK(); return memory; @@ -1193,7 +1199,7 @@ DEF_HELPER(OrderedCompareEflags, float64_t src1, float64_t src2)->void { } } -DEF_FPU_SEM(FUCOMI, RF80 src1, RF80 src2) { +DEF_FPU_SEM(FUCOMI, RF80W src1, RF80W src2) { SetFPUIpOp(); auto st0 = Read(src1); auto sti = Read(src2); @@ -1201,17 +1207,17 @@ DEF_FPU_SEM(FUCOMI, RF80 src1, RF80 src2) { FLAG_OF = 0; FLAG_SF = 0; FLAG_AF = 0; - UnorderedCompareEflags(memory, state, st0, sti); + UnorderedCompareEflags(memory, state, Float64(st0), Float64(sti)); return memory; } -DEF_FPU_SEM(FUCOMIP, RF80 src1, RF80 src2) { +DEF_FPU_SEM(FUCOMIP, RF80W src1, RF80W src2) { memory = FUCOMI(memory, state, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; } -DEF_FPU_SEM(FCOMI, RF80 src1, RF80 src2) { +DEF_FPU_SEM(FCOMI, RF80W src1, RF80W src2) { SetFPUIpOp(); auto st0 = Read(src1); auto sti = Read(src2); @@ -1219,11 +1225,11 @@ DEF_FPU_SEM(FCOMI, RF80 src1, RF80 src2) { FLAG_OF = 0; FLAG_SF = 0; FLAG_AF = 0; - OrderedCompareEflags(memory, state, st0, sti); + OrderedCompareEflags(memory, state, Float64(st0), Float64(sti)); return memory; } -DEF_FPU_SEM(FCOMIP, RF80 src1, RF80 src2) { +DEF_FPU_SEM(FCOMIP, RF80W src1, RF80W src2) { memory = FCOMI(memory, state, src1, src2, pc, fop); (void) POP_X87_STACK(); return memory; @@ -1234,8 +1240,8 @@ DEF_FPU_SEM(FCOMIP, RF80 src1, RF80 src2) { DEF_ISEL(FXAM) = DoFXAM; DEF_ISEL(FTST) = DoFTST; -DEF_ISEL(FUCOM_ST0_X87) = FUCOM; -DEF_ISEL(FUCOMP_ST0_X87) = FUCOMP; +DEF_ISEL(FUCOM_ST0_X87) = FUCOM; +DEF_ISEL(FUCOMP_ST0_X87) = FUCOMP; DEF_ISEL(FUCOMPP) = DoFUCOMPP; DEF_ISEL(FUCOMI_ST0_X87) = FUCOMI; @@ -1244,16 +1250,16 @@ DEF_ISEL(FUCOMIP_ST0_X87) = FUCOMIP; DEF_ISEL(FCOMI_ST0_X87) = FCOMI; DEF_ISEL(FCOMIP_ST0_X87) = FCOMIP; -DEF_ISEL(FCOM_ST0_X87) = FCOM; -DEF_ISEL(FCOM_ST0_X87_DCD0) = FCOM; +DEF_ISEL(FCOM_ST0_X87) = FCOM; +DEF_ISEL(FCOM_ST0_X87_DCD0) = FCOM; DEF_ISEL(FCOM_ST0_MEMmem32real) = FCOMmem; DEF_ISEL(FCOM_ST0_MEMm64real) = FCOMmem; -DEF_ISEL(FCOMP_ST0_X87) = FCOMP; +DEF_ISEL(FCOMP_ST0_X87) = FCOMP; DEF_ISEL(FCOMP_ST0_MEMmem32real) = FCOMPmem; DEF_ISEL(FCOMP_ST0_MEMm64real) = FCOMPmem; -DEF_ISEL(FCOMP_ST0_X87_DCD1) = FCOMP; -DEF_ISEL(FCOMP_ST0_X87_DED0) = FCOMP; +DEF_ISEL(FCOMP_ST0_X87_DCD1) = FCOMP; +DEF_ISEL(FCOMP_ST0_X87_DED0) = FCOMP; DEF_ISEL(FCOMPP) = DoFCOMPP; namespace { @@ -1320,29 +1326,30 @@ namespace { DEF_FPU_SEM(DoFRNDINT) { SetFPUIpOp(); - auto st0 = Read(X87_ST0); - auto rounded = FRoundUsingMode64(st0); + float80_t st0 = Read(X87_ST0); + float80_t rounded = FRoundUsingMode80(st0); state.sw.ie |= IsSignalingNaN(st0); state.sw.de = IsDenormal(st0); if (!IsNaN(rounded)) { state.sw.pe = st0 != rounded; } - // state.sw.c1 = __builtin_isgreater(FAbs(rounded), FAbs(st0)) ? 1_u8 : 0_u8; + + //state.sw.c1 = __builtin_isgreater(FAbs(rounded), FAbs(st0)) ? 1_u8 : 0_u8; Write(X87_ST0, rounded); return memory; } DEF_FPU_SEM(DoFYL2X) { SetFPUIpOp(); - auto st0 = Read(X87_ST0); - auto st1 = Read(X87_ST1); + auto st0 = Float64(Read(X87_ST0)); + auto st1 = Float64(Read(X87_ST1)); state.sw.ze = IsZero(st0); state.sw.de = IsDenormal(st0) | IsDenormal(st1); state.sw.ie = (IsSignalingNaN(st0) | IsSignalingNaN(st1)) || (IsNegative(st0) && !IsInfinite(st0) && !state.sw.ze); - auto res = FMul64(st1, __builtin_log2(st0)); + auto res = FMul64(st1, FLog2(st0)); state.sw.pe = IsImprecise(res); - Write(X87_ST1, res); + Write(X87_ST1, Float80(res)); (void) POP_X87_STACK(); return memory; } @@ -1354,20 +1361,20 @@ DEF_FPU_SEM(DoFYL2XP1) { state.sw.ze = IsZero(st0); state.sw.de = IsDenormal(st0) | IsDenormal(st1); state.sw.ie = IsSignalingNaN(st0) | IsSignalingNaN(st1); - auto res = FMul(st1, __builtin_log2(FAdd(st0, 1.0))); + auto res = FMul(st1, Float80(FLog2(FAdd(st0, Float80(1.0))))); state.sw.pe = IsImprecise(res); Write(X87_ST1, res); (void) POP_X87_STACK(); return memory; } -DEF_FPU_SEM(FFREE, RF80 src) { +DEF_FPU_SEM(FFREE, RF80W src) { SetFPUIpOp(); (void) src; return memory; } -DEF_FPU_SEM(FFREEP, RF80 src) { +DEF_FPU_SEM(FFREEP, RF80W src) { SetFPUIpOp(); (void) POP_X87_STACK(); (void) src; @@ -1443,14 +1450,14 @@ DEF_FPU_SEM(FCMOVB, D dst, S1 src1) { } // namespace -DEF_ISEL(FCMOVNU_ST0_X87) = FCMOVNP; -DEF_ISEL(FCMOVNB_ST0_X87) = FCMOVNB; -DEF_ISEL(FCMOVNE_ST0_X87) = FCMOVNZ; -DEF_ISEL(FCMOVBE_ST0_X87) = FCMOVBE; -DEF_ISEL(FCMOVNBE_ST0_X87) = FCMOVNBE; -DEF_ISEL(FCMOVU_ST0_X87) = FCMOVP; -DEF_ISEL(FCMOVE_ST0_X87) = FCMOVZ; -DEF_ISEL(FCMOVB_ST0_X87) = FCMOVB; +DEF_ISEL(FCMOVNU_ST0_X87) = FCMOVNP; +DEF_ISEL(FCMOVNB_ST0_X87) = FCMOVNB; +DEF_ISEL(FCMOVNE_ST0_X87) = FCMOVNZ; +DEF_ISEL(FCMOVBE_ST0_X87) = FCMOVBE; +DEF_ISEL(FCMOVNBE_ST0_X87) = FCMOVNBE; +DEF_ISEL(FCMOVU_ST0_X87) = FCMOVP; +DEF_ISEL(FCMOVE_ST0_X87) = FCMOVZ; +DEF_ISEL(FCMOVB_ST0_X87) = FCMOVB; namespace { diff --git a/tests/X86/Run.cpp b/tests/X86/Run.cpp index 6ae40b1fb..05b66d78f 100644 --- a/tests/X86/Run.cpp +++ b/tests/X86/Run.cpp @@ -189,11 +189,11 @@ MAKE_RW_MEMORY(64) MAKE_RW_FP_MEMORY(32) MAKE_RW_FP_MEMORY(64) -NEVER_INLINE float64_t __remill_read_memory_f80(Memory *, addr_t addr) { - LongDoubleStorage storage; +NEVER_INLINE float80_t __remill_read_memory_f80(Memory *, addr_t addr) { + LongDoubleStorage storage{0.0, 0}; storage.val = AccessMemory(addr); auto val_long = *reinterpret_cast(&storage); - return static_cast(val_long); + return static_cast(val_long); } NEVER_INLINE float64_t __remill_read_memory_f128(Memory *, addr_t) { @@ -202,8 +202,8 @@ NEVER_INLINE float64_t __remill_read_memory_f128(Memory *, addr_t) { } NEVER_INLINE Memory *__remill_write_memory_f80(Memory *memory, addr_t addr, - float64_t val) { - LongDoubleStorage storage; + float80_t val) { + LongDoubleStorage storage{0.0, 0}; auto val_long = static_cast(val); memcpy(&storage, &val_long, sizeof(val_long)); AccessMemory(addr) = storage.val; @@ -454,7 +454,7 @@ void __remill_mark_as_used(void *mem) { } // extern C -typedef Memory *(LiftedFunc)(X86State &, addr_t, Memory *); +typedef Memory *(LiftedFunc) (X86State &, addr_t, Memory *); // Mapping of test name to translated function. static std::map gTranslatedFuncs;