Skip to content

Commit

Permalink
Merge pull request #161 from munroesj52/cpsgn-fix1b
Browse files Browse the repository at this point in the history
Fix vec_copysign implementations per issue #158, Part 1B.
  • Loading branch information
munroesj52 authored Feb 2, 2022
2 parents ef3aff3 + 8f14ea4 commit 8e670f4
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 206 deletions.
33 changes: 22 additions & 11 deletions src/pveclib/vec_f128_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ vec_xsxexpqp (__binary128 f128)
* \endcode
*
* \note Would like to use the intrinsic scalar_extract_exp() here but
* this is not available until GCC 11.
* this is not available until GCC 11 (or later).
* Also GCC defines these scalar built-ins to return integer scalar
* values in GPRs.
* This would defeat the purpose of an all vector implementation.
Expand Down Expand Up @@ -1769,7 +1769,7 @@ static inline vec_xscvsqqp (vi128_t int128)
lo64 = int64[VEC_DW_L];
result = (hi64 * two64) + lo64;
// copy the __int128's sign into the __binary128 result
result = vec_copysignf128 (result, i_sign);
result = vec_copysignf128 (i_sign, result);
#elif defined (_ARCH_PWR8)
...
#endif
Expand Down Expand Up @@ -3806,26 +3806,37 @@ vec_all_iszerof128 (__binary128 f128)
#endif
}

/** \brief Copy the sign bit from f128y and merge with the magnitude
* from f128x. The merged result is returned as a __float128 value.
/** \brief Copy the sign bit from f128x and merge with the magnitude
* from f128y. The merged result is returned as a __float128 value.
*
* \note This operation was patterned after the intrinsic vec_cpsgn
* (altivec.h) introduced for POWER7 and VSX. It turns out the
* original (GCC 4.9) compiler implementation reversed the operands
* and does not match the PowerISA or the Vector Intrinsic Programming
* Reference manuals. Subsequent compilers and PVECLIB
* implementations replicated this (operand order) error.
* This has now been reported as bug against the compilers, which are
* in the process of applying fixes and distributing updates.
* This version of PVECLIB is updated to match the Vector Intrinsic
* Programming Reference.
*
* |processor|Latency|Throughput|
* |--------:|:-----:|:---------|
* |power8 | 2-11 | 2/cycle |
* |power9 | 2 | 4/cycle |
*
* @param f128x a __float128 value containing the magnitude.
* @param f128y a __float128 value containing the sign bit.
* @return a __float128 value with magnitude from f128x and the
* sign of f128y.
* @param f128x a __float128 value containing the sign bit.
* @param f128y a __float128 value containing the magnitude.
* @return a __float128 value with magnitude from f128y and the
* sign of f128x.
*/
static inline __binary128
vec_copysignf128 (__binary128 f128x, __binary128 f128y)
{
__binary128 result;
#if _ARCH_PWR9
__asm__(
"xscpsgnqp %0,%2,%1;\n"
"xscpsgnqp %0,%1,%2;\n"
: "=v" (result)
: "v" (f128x), "v" (f128y)
:);
Expand All @@ -3835,7 +3846,7 @@ vec_copysignf128 (__binary128 f128x, __binary128 f128y)
tmpx = vec_xfer_bin128_2_vui32t (f128x);
tmpy = vec_xfer_bin128_2_vui32t (f128y);

tmp = vec_sel (tmpx, tmpy, signmask);
tmp = vec_sel (tmpy, tmpx, signmask);
result = vec_xfer_vui32t_2_bin128 (tmp);
#endif
return (result);
Expand Down Expand Up @@ -8006,7 +8017,7 @@ static inline vec_xscvsqqp (vi128_t int128)
lo64 = int64[VEC_DW_L];
result = (hi64 * two64) + lo64;
// Copy the __int128's sign into the __binary128 result
result = vec_copysignf128 (result, i_sign);
result = vec_copysignf128 (i_sign, result);
#elif defined (_ARCH_PWR8)
vui64_t q_exp;
vui128_t q_sig;
Expand Down
37 changes: 29 additions & 8 deletions src/pveclib/vec_f32_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -788,31 +788,52 @@ vec_any_iszerof32 (vf32_t vf32)
#endif
}

/** \brief Copy the sign bit from vf32y merged with magnitude from
* vf32x and return the resulting vector float values.
/** \brief Copy the sign bit from vf32x merged with magnitude from
* vf32y and return the resulting vector float values.
*
* \note This operation was patterned after the intrinsic vec_cpsgn
* (altivec.h) introduced for POWER7 and VSX. It turns out the
* original (GCC 4.9) compiler implementation reversed the operands
* and does not match the PowerISA or the Vector Intrinsic Programming
* Reference manuals. Subsequent compilers and PVECLIB
* implementations replicated this (operand order) error.
* This has now been reported as bug against the compilers, which are
* in the process of applying fixes and distributing updates.
* This version of PVECLIB is updated to match the Vector Intrinsic
* Programming Reference. This implementation is independent of the
* compilers update status.
*
* |processor|Latency|Throughput|
* |--------:|:-----:|:---------|
* |power8 | 6-7 | 2/cycle |
* |power9 | 2 | 2/cycle |
*
* @param vf32x vector float values containing the magnitudes.
* @param vf32y vector float values containing the sign bits.
* @return vector float values with magnitude from vf32x and the
* sign of vf32y.
* @param vf32x vector float values containing the sign bits.
* @param vf32y vector float values containing the magnitudes.
* @return vector float values with magnitude from vf32y and the
* sign of vf32x.
*/
static inline vf32_t
vec_copysignf32 (vf32_t vf32x, vf32_t vf32y)
{
#if _ARCH_PWR7
/* P9 has a 2 cycle xvcpsgnsp and eliminates a const load. */
#ifdef PVECLIB_CPSGN_FIXED
return (vec_cpsgn (vf32x, vf32y));
#else
vf32_t result;
__asm__(
"xvcpsgnsp %x0,%x1,%x2;\n"
: "=wa" (result)
: "wa" (vf32x), "wa" (vf32y)
:);
return (result);
#endif
#else
const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000,
0x80000000, 0x80000000);
vf32_t result;

result = (vf32_t)vec_sel ((vui32_t)vf32x, (vui32_t)vf32y, signmask);
result = (vf32_t)vec_sel ((vui32_t)vf32y, (vui32_t)vf32x, signmask);
return (result);
#endif
}
Expand Down
46 changes: 34 additions & 12 deletions src/pveclib/vec_f64_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -780,31 +780,53 @@ vec_any_iszerof64 (vf64_t vf64)
#endif
}

/** \brief Copy the sign bit from vf64y merged with magnitude from
* vf64x and return the resulting vector double values.
/** \brief Copy the sign bit from vf64x merged with magnitude from
* vf64y and return the resulting vector double values.
*
* \note This operation was patterned after the intrinsic vec_cpsgn
* (altivec.h) introduced for POWER7 and VSX. It turns out the
* original (GCC 4.9) compiler implementation reversed the operands
* and does not match the PowerISA or the Vector Intrinsic Programming
* Reference manuals. Subsequent compilers and PVECLIB
* implementations replicated this (operand order) error.
* This has now been reported as bug against the compilers, which are
* in the process of applying fixes and distributing updates.
* This version of PVECLIB is updated to match the Vector Intrinsic
* Programming Reference. This implementation is independent of the
* compilers update status.
*
* |processor|Latency|Throughput|
* |--------:|:-----:|:---------|
* |power8 | 6-7 | 2/cycle |
* |power9 | 2 | 2/cycle |
*
* @param vf64x vector double values containing the magnitudes.
* @param vf64y vector double values containing the sign bits.
* @return vector double values with magnitude from vf64x and the
* sign of vf64y.
* @param vf64x vector double values containing the sign bits.
* @param vf64y vector double values containing the magnitudes.
* @return vector double values with magnitude from vf64y and the
* sign of vf64x.
*/
static inline vf64_t
vec_copysignf64 (vf64_t vf64x , vf64_t vf64y)
vec_copysignf64 (vf64_t vf64x, vf64_t vf64y)
{
#if _ARCH_PWR7
/* P9 has a 2 cycle xvcpsgndp and eliminates a const load. */
return (vec_cpsgn (vf64x, vf64y));
#ifdef PVECLIB_CPSGN_FIXED
return (vec_cpsgn (vf64x, vf64y));
#else
vf64_t result;
__asm__(
"xvcpsgndp %x0,%x1,%x2;\n"
: "=wa" (result)
: "wa" (vf64x), "wa" (vf64y)
:);
return (result);
#endif
#else
const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0x80000000, 0);
vf64_t result;
const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0x80000000, 0);
vf64_t result;

result = (vf64_t)vec_sel ((vui32_t)vf64x, (vui32_t)vf64y, signmask);
return (result);
result = (vf64_t) vec_sel ((vui32_t) vf64y, (vui32_t) vf64x, signmask);
return (result);
#endif
}

Expand Down
Loading

0 comments on commit 8e670f4

Please sign in to comment.