diff --git a/src/rp2_common/pico_float/float_single_hazard3.S b/src/rp2_common/pico_float/float_single_hazard3.S index 1e57f1f25..ee2f1c72b 100644 --- a/src/rp2_common/pico_float/float_single_hazard3.S +++ b/src/rp2_common/pico_float/float_single_hazard3.S @@ -216,13 +216,10 @@ __mulsf3: clz a0, a4 sll a4, a4, a0 sub a2, a2, a0 - // After normalising we can calculate the final exponent, since rounding - // cannot increase the exponent for multiplication (unlike addition) add a2, a2, a3 // Subtract redundant bias term (127), add 1 for normalisation correction addi a2, a2, -126 blez a2, __mulsf_underflow - bge a2, t0, __mulsf_overflow // Gather sticky bits from low fraction: snez a1, a1 @@ -231,6 +228,10 @@ __mulsf3: bexti a1, a4, 8 add a4, a4, a1 addi a4, a4, 127 + // Check carry-out: exponent may increase due to rounding + bgez a4, 2f +1: + bge a2, t0, __mulsf_overflow // Pack it and ship it packh a2, a2, a6 slli a2, a2, 23 @@ -238,6 +239,10 @@ __mulsf3: srli a4, a4, 9 add a0, a4, a2 ret +2: + srli a4, a4, 1 + addi a2, a2, 1 + j 1b __mulsf_underflow: // Signed zero diff --git a/test/pico_float_test/pico_float_test_hazard3.c b/test/pico_float_test/pico_float_test_hazard3.c index 61f2a8e19..47da23b8d 100644 --- a/test/pico_float_test/pico_float_test_hazard3.c +++ b/test/pico_float_test/pico_float_test_hazard3.c @@ -149,6 +149,18 @@ test_t mul_directed_tests[] = { // 1.25 x 2^-63 x 1.25 x 2^-64 = 0 // (normal inputs with subnormal output, and we claim to be FTZ) {0x20200000u, 0x1fa00000u, 0x00000000u}, + // 1.333333 (rounded down) x 1.5 = 2 - 1 ulp + {0x3faaaaaau, 0x3fc00000u, 0x3fffffffu}, + // 1.333333 (rounded down) x (1.5 + 1 ulp) = 2 exactly + {0x3faaaaaau, 0x3fc00001u, 0x40000000u}, + // (1.333333 (rounded down) + 1 ulp) x 1.5 = 2 exactly + {0x3faaaaabu, 0x3fc00000u, 0x40000000u}, + // (1.25 - 1 ulp) x (0.8 + 1 ulp) = 1 exactly (exponent increases after rounding) + {0x3f9fffffu, 0x3f4cccceu, 0x3f800000u}, + // as above, but overflow on exponent increase -> +inf + {0x3f9fffffu, 0x7f4cccceu, 0x7f800000u}, + // subtract 1 ulp from rhs -> largest normal + {0x3f9fffffu, 0x7f4ccccdu, 0x7f7fffffu}, }; #define N_RANDOM_TESTS 1000