diff --git a/sse2neon.h b/sse2neon.h index 6a8a3773..50597cd0 100644 --- a/sse2neon.h +++ b/sse2neon.h @@ -2458,7 +2458,7 @@ FORCE_INLINE __m128 _mm_set_ps1(float _w) // the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, // _MM_ROUND_TOWARD_ZERO // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE -FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) +FORCE_INLINE_OPTNONE void _MM_SET_ROUNDING_MODE(int rounding) { union { fpcr_bitfield field; @@ -2520,7 +2520,7 @@ FORCE_INLINE __m128 _mm_set1_ps(float _w) // integer a. // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr // FIXME: _mm_setcsr() implementation supports changing the rounding mode only. -FORCE_INLINE void _mm_setcsr(unsigned int a) +FORCE_INLINE_OPTNONE void _mm_setcsr(unsigned int a) { _MM_SET_ROUNDING_MODE(a); } @@ -9258,7 +9258,7 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a) #endif } -FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) +FORCE_INLINE_OPTNONE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) { // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting, // regardless of the value of the FZ bit. diff --git a/tests/common.h b/tests/common.h index 7978d7b3..c1f072fb 100644 --- a/tests/common.h +++ b/tests/common.h @@ -51,6 +51,15 @@ typedef union ALIGN_STRUCT(16) SIMDVec { #endif #endif +#if defined(__GNUC__) && !defined(__clang__) +#pragma push_macro("OPTNONE") +#define OPTNONE __attribute__((optimize("O0"))) +#elif defined(__clang__) +#pragma push_macro("OPTNONE") +#define OPTNONE __attribute__((optnone)) +#else +#endif + #define ASSERT_RETURN(x) \ if (!(x)) \ return TEST_FAIL; diff --git a/tests/impl.cpp b/tests/impl.cpp index 839a93d3..000508f6 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -4459,7 +4459,7 @@ result_t test_mm_cvtepi32_ps(const SSE2NEONTestImpl &impl, uint32_t iter) return validateFloat(ret, trun[0], trun[1], trun[2], trun[3]); } -result_t test_mm_cvtpd_epi32(const SSE2NEONTestImpl &impl, uint32_t iter) +OPTNONE result_t test_mm_cvtpd_epi32(const SSE2NEONTestImpl &impl, uint32_t iter) { const double *_a = (const double *) impl.mTestFloatPointer1; int32_t d[2] = {}; @@ -11837,7 +11837,7 @@ result_t test_mm_popcnt_u64(const SSE2NEONTestImpl &impl, uint32_t iter) return TEST_SUCCESS; } -result_t test_mm_set_denormals_zero_mode(const SSE2NEONTestImpl &impl, +OPTNONE result_t test_mm_set_denormals_zero_mode(const SSE2NEONTestImpl &impl, uint32_t iter) { result_t res_set_denormals_zero_on, res_set_denormals_zero_off;