diff --git a/include/eve/module/core/regular/impl/fam.hpp b/include/eve/module/core/regular/impl/fam.hpp index 1a3262da95..d48350e32a 100644 --- a/include/eve/module/core/regular/impl/fam.hpp +++ b/include/eve/module/core/regular/impl/fam.hpp @@ -105,6 +105,6 @@ namespace eve::detail } // REGULAR --------------------- else - return a + b * c; + return fma(b, c, a); } } diff --git a/include/eve/module/core/regular/impl/simd/x86/fam.hpp b/include/eve/module/core/regular/impl/simd/x86/fam.hpp index 03c5cc164d..cc2995dec1 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fam.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fam.hpp @@ -8,49 +8,43 @@ #pragma once #include +#include #include -#include - -#include +#include namespace eve::detail { // ----------------------------------------------------------------------------------------------- // Masked case template - EVE_FORCEINLINE wide fam_(EVE_SUPPORTS(sse2_), - C const &cx, - O const &opts, + EVE_FORCEINLINE wide fam_(EVE_REQUIRES(avx512_), + C const &mask, + O const &, wide const &v, wide const &w, wide const &x) noexcept requires x86_abi> { - constexpr auto c = categorize>(); + // NOTE: As those masked version are at the AVX512 level, they will always uses a variant of + // hardware VMADD, thus ensuring the pedantic behavior by default, hence why we don't care about + // PEDANTIC. As usual, we don't care about PROMOTE as we only accept similar types. - if constexpr( C::is_complete || abi_t::is_wide_logical ) - { - return fam.behavior(cpu_{}, opts, v, w, x); - } - else + if constexpr( C::is_complete ) + return alternative(mask, v, as(v)); + else if constexpr( !C::has_alternative ) { - auto m = expand_mask(cx, as> {}).storage().value; + constexpr auto c = categorize>(); + [[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value; - if constexpr( !C::has_alternative ) - { - if constexpr( c == category::float32x16 ) return _mm512_mask3_fmadd_ps(w, x, v, m); - else if constexpr( c == category::float64x8 ) return _mm512_mask3_fmadd_pd(w, x, v, m); - else if constexpr( c == category::float32x8 ) return _mm256_mask3_fmadd_ps(w, x, v, m); - else if constexpr( c == category::float64x4 ) return _mm256_mask3_fmadd_pd(w, x, v, m); - else if constexpr( c == category::float32x8 ) return _mm128_mask3_fmadd_ps(w, x, v, m); - else if constexpr( c == category::float64x4 ) return _mm128_mask3_fmadd_pd(w, x, v, m); - else return fam.behavior(cpu_{}, opts, v, w, x); - } - else - { - auto src = alternative(cx, v, as> {}); - return fam.behavior(cpu_{}, opts, v, w, x); - } + if constexpr( c == category::float32x16) return _mm512_mask3_fmadd_ps(w, x, v, m); + else if constexpr( c == category::float64x8 ) return _mm512_mask3_fmadd_pd(w, x, v, m); + else if constexpr( c == category::float32x8 ) return _mm256_mask3_fmadd_ps(w, x, v, m); + else if constexpr( c == category::float64x4 ) return _mm256_mask3_fmadd_pd(w, x, v, m); + else if constexpr( c == category::float32x8 ) return _mm128_mask3_fmadd_ps(w, x, v, m); + else if constexpr( c == category::float64x4 ) return _mm128_mask3_fmadd_pd(w, x, v, m); + // No rounding issue with integers, so we just mask over regular FMA + else return if_else(mask, eve::fam(v, w, x), v); } + else return if_else(mask, eve::fam(v, w, x), alternative(mask, v, as(v))); } } diff --git a/include/eve/module/core/regular/impl/simd/x86/fanm.hpp b/include/eve/module/core/regular/impl/simd/x86/fanm.hpp index 3e844ff22c..02977985e8 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fanm.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fanm.hpp @@ -8,48 +8,47 @@ #pragma once #include +#include #include -#include - -#include +#include namespace eve::detail { // ----------------------------------------------------------------------------------------------- // Masked case -template -EVE_FORCEINLINE wide - fanm_(EVE_SUPPORTS(sse2_), - C const &cx, - wide const &v, - wide const &w, - wide const &x) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - - if constexpr( C::is_complete || abi_t::is_wide_logical ) - { - return fanm_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else + template + EVE_FORCEINLINE wide fanm_(EVE_SUPPORTS(avx512_), + C const &mask, + wide const &v, + wide const &w, + wide const &x) noexcept + requires x86_abi> { - auto m = expand_mask(cx, as> {}).storage().value; + // NOTE: As those masked version are at the AVX512 level, they will always uses a variant of + // hardware VMADD, thus ensuring the pedantic behavior by default, hence why we don't care about + // PEDANTIC. As usual, we don't care about PROMOTE as we only accept similar types. - if constexpr( !C::has_alternative ) + if constexpr( C::is_complete ) + return alternative(mask, v, as(v)); + else if constexpr( !C::has_alternative ) { - if constexpr( c == category::float32x16 ) return _mm512_mask3_fnmadd_ps(w, x, v, m); - else if constexpr( c == category::float64x8 ) return _mm512_mask3_fnmadd_pd(w, x, v, m); - else if constexpr( c == category::float32x8 ) return _mm256_mask3_fnmadd_ps(w, x, v, m); - else if constexpr( c == category::float64x4 ) return _mm256_mask3_fnmadd_pd(w, x, v, m); - else if constexpr( c == category::float32x8 ) return _mm128_mask3_fnmadd_ps(w, x, v, m); - else if constexpr( c == category::float64x4 ) return _mm128_mask3_fnmadd_pd(w, x, v, m); - else return fanm_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else - { - auto src = alternative(cx, v, as> {}); - return fanm_(EVE_RETARGET(cpu_), cx, v, w, x); + constexpr auto c = categorize>(); + [[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value; + + if constexpr( !C::has_alternative ) + { + if constexpr( c == category::float32x16) return _mm512_mask3_fnmadd_ps(w, x, v, m); + else if constexpr( c == category::float64x8 ) return _mm512_mask3_fnmadd_pd(w, x, v, m); + else if constexpr( c == category::float32x8 ) return _mm256_mask3_fnmadd_ps(w, x, v, m); + else if constexpr( c == category::float64x4 ) return _mm256_mask3_fnmadd_pd(w, x, v, m); + else if constexpr( c == category::float32x8 ) return _mm128_mask3_fnmadd_ps(w, x, v, m); + else if constexpr( c == category::float64x4 ) return _mm128_mask3_fnmadd_pd(w, x, v, m); + // No rounding issue with integers, so we just mask over regular FMA + else + return if_else(mask, eve::fanm(v, w, x), v); + } + else + return if_else(mask, eve::fanm(v, w, x), alternative(mask, v, as(v))); } } } -} diff --git a/include/eve/module/core/regular/impl/simd/x86/fma.hpp b/include/eve/module/core/regular/impl/simd/x86/fma.hpp index c62415feee..b1af826eb1 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fma.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fma.hpp @@ -15,8 +15,11 @@ namespace eve::detail { template - EVE_FORCEINLINE wide - fma_(EVE_REQUIRES(sse2_), O const& opts, wide const& a, wide const& b, wide const& c) noexcept + EVE_FORCEINLINE wide fma_(EVE_REQUIRES(sse2_), + O const& opts, + wide const& a, + wide const& b, + wide const& c) noexcept requires x86_abi> { // Integral don't do anything special ---- @@ -47,9 +50,12 @@ namespace eve::detail } template - EVE_FORCEINLINE wide - fma_( EVE_REQUIRES(avx512_), C const& mask, O const& - , wide const& a, wide const& b, wide const& c + EVE_FORCEINLINE wide fma_( EVE_REQUIRES(avx512_), + C const& mask, + O const&, + wide const& a, + wide const& b, + wide const& c ) noexcept requires x86_abi> { diff --git a/include/eve/module/core/regular/impl/simd/x86/fms.hpp b/include/eve/module/core/regular/impl/simd/x86/fms.hpp index 91f88e8dba..cda0ffe7b5 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fms.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fms.hpp @@ -7,75 +7,82 @@ //================================================================================================== #pragma once + #include +#include #include -#include +#include #include #include namespace eve::detail { -template -EVE_FORCEINLINE wide - fms_(EVE_SUPPORTS(avx2_), - wide const &a, - wide const &b, - wide const &c) noexcept requires x86_abi> -{ - if constexpr( std::is_integral_v ) { return fms_(EVE_RETARGET(cpu_), a, b, c); } - else + template + EVE_FORCEINLINE wide fms_(EVE_REQUIRES(avx2_), + O const &opts, + wide const &a, + wide const &b, + wide const &c) noexcept + requires x86_abi> { - constexpr auto cat = categorize>(); - - if constexpr( cat == category::float64x8 ) return _mm512_fmsub_pd(a, b, c); - else if constexpr( cat == category::float32x16 ) return _mm512_fmsub_ps(a, b, c); - else if constexpr( supports_fma3 ) + // Integral don't do anything special ---- + if constexpr( std::integral ) return fms.behavior(cpu_{}, opts, a, b, c); + // PEDANTIC --- + else if constexpr(O::contains(pedantic2) ) { - if constexpr( cat == category::float64x4 ) return _mm256_fmsub_pd(a, b, c); - else if constexpr( cat == category::float64x2 ) return _mm_fmsub_pd(a, b, c); - else if constexpr( cat == category::float32x8 ) return _mm256_fmsub_ps(a, b, c); - else if constexpr( cat == category::float32x4 ) return _mm_fmsub_ps(a, b, c); + if constexpr( supports_fma3 ) return fms(a, b, c); + else return fms.behavior(cpu_{}, opts, a, b, c); + } + // REGULAR --- + // we don't care about PROMOTE as we only accept similar types. + else + { + constexpr auto cat = categorize>(); + + if constexpr( cat == category::float64x8 ) return _mm512_fmsub_pd(a, b, c); + else if constexpr( cat == category::float32x16 ) return _mm512_fmsub_ps(a, b, c); + else if constexpr( supports_fma3 ) + { + if constexpr( cat == category::float64x4 ) return _mm256_fmsub_pd(a, b, c); + else if constexpr( cat == category::float64x2 ) return _mm_fmsub_pd(a, b, c); + else if constexpr( cat == category::float32x8 ) return _mm256_fmsub_ps(a, b, c); + else if constexpr( cat == category::float32x4 ) return _mm_fmsub_ps(a, b, c); + } + else return fma(a, b, -c); } - else return fma(a, b, -c); } -} // ----------------------------------------------------------------------------------------------- // Masked case -template -EVE_FORCEINLINE wide - fms_(EVE_SUPPORTS(avx512_), - C const &cx, - wide const &v, - wide const &w, - wide const &x) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - - if constexpr( C::is_complete || abi_t::is_wide_logical ) - { - return fms_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else + template + EVE_FORCEINLINE wide fms_(EVE_SUPPORTS(avx512_), + C const &mask, + O const &, + wide const &v, + wide const &w, + wide const &x) noexcept + requires x86_abi> { - auto m = expand_mask(cx, as> {}).storage().value; + // NOTE: As those masked version are at the AVX512 level, they will always uses a variant of + // hardware VMADD, thus ensuring the pedantic behavior by default, hence why we don't care about + // PEDANTIC. As usual, we don't care about PROMOTE as we only accept similar types. - if constexpr( !C::has_alternative ) + if constexpr( C::is_complete ) return alternative(mask, v, as(v)); + else if constexpr( !C::has_alternative ) { - if constexpr( c == category::float32x16 ) return _mm512_mask_fmsub_ps(v, m, w, x); + constexpr auto c = categorize>(); + [[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value; + + if constexpr( c == category::float32x16) return _mm512_mask_fmsub_ps(v, m, w, x); else if constexpr( c == category::float64x8 ) return _mm512_mask_fmsub_pd(v, m, w, x); else if constexpr( c == category::float32x8 ) return _mm256_mask_fmsub_ps(v, m, w, x); else if constexpr( c == category::float64x4 ) return _mm256_mask_fmsub_pd(v, m, w, x); else if constexpr( c == category::float32x4 ) return _mm_mask_fmsub_ps(v, m, w, x); else if constexpr( c == category::float64x2 ) return _mm_mask_fmsub_pd(v, m, w, x); - else return fms_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else - { - auto src = alternative(cx, v, as> {}); - return fms_(EVE_RETARGET(cpu_), cx, v, w, x); + // No rounding issue with integers, so we just mask over regular FMA + else return if_else(mask, eve::fms(v, w, x), v); } + else return if_else(mask, eve::fms(v, w, x), alternative(mask, v, as(v))); } } -} diff --git a/include/eve/module/core/regular/impl/simd/x86/fnma.hpp b/include/eve/module/core/regular/impl/simd/x86/fnma.hpp index d39456682a..5a445a1ab3 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fnma.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fnma.hpp @@ -19,8 +19,11 @@ namespace eve::detail { template - EVE_FORCEINLINE wide - fnma_(EVE_REQUIRES(sse2_), O const& opts, wide const& a, wide const& b, wide const& c) noexcept + EVE_FORCEINLINE wide fnma_(EVE_REQUIRES(sse2_), + O const& opts, + wide const& a, + wide const& b, + wide const& c) noexcept requires x86_abi> { // Integral don't do anything special ---- @@ -51,9 +54,12 @@ namespace eve::detail } template - EVE_FORCEINLINE wide - fnma_( EVE_REQUIRES(avx512_), C const& mask, O const& - , wide const& a, wide const& b, wide const& c + EVE_FORCEINLINE wide fnma_( EVE_REQUIRES(avx512_), + C const& mask, + O const& + , wide const& a, + wide const& b, + wide const& c ) noexcept requires x86_abi> { diff --git a/include/eve/module/core/regular/impl/simd/x86/fnms.hpp b/include/eve/module/core/regular/impl/simd/x86/fnms.hpp index 44581994d7..3e6e31d4dd 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fnms.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fnms.hpp @@ -9,8 +9,7 @@ #include #include -#include -#include +#include #include #include @@ -18,51 +17,72 @@ namespace eve::detail { -template -EVE_FORCEINLINE wide - fnms_(EVE_SUPPORTS(avx2_), - wide const &a, - wide const &b, - wide const &c) noexcept requires x86_abi> -{ - return fms(-a, b, c); -} + template + EVE_FORCEINLINE wide fnms_(EVE_REQUIRES(sse2_), + O const& opts, + wide const& a, + wide const& b, + wide const& c) noexcept + requires x86_abi> + { + // Integral don't do anything special ---- + if constexpr( std::integral ) return fnms.behavior(cpu_{}, opts, a, b, c); + // PEDANTIC --- + else if constexpr(O::contains(pedantic2) ) + { + if constexpr( supports_fma3 ) return fnms(a, b, c); + else return fnms.behavior(cpu_{}, opts, a, b, c); + } + // REGULAR --- + // we don't care about PROMOTE as we only accept similar types. + else + { + constexpr auto cat = categorize>(); + + if constexpr( cat == category::float64x8 ) return _mm512_fnmsub_pd(a, b, c); + else if constexpr( cat == category::float32x16 ) return _mm512_fnmsub_ps(a, b, c); + else if constexpr( supports_fma3) + { + if constexpr( cat == category::float64x4 ) return _mm256_fnmsub_pd(a, b, c); + else if constexpr( cat == category::float64x2 ) return _mm_fnmsub_pd (a, b, c); + else if constexpr( cat == category::float32x8 ) return _mm256_fnmsub_ps(a, b, c); + else if constexpr( cat == category::float32x4 ) return _mm_fnmsub_ps (a, b, c); + } + else return fnms.behavior(cpu_{}, opts, a, b, c); + } + } + // ----------------------------------------------------------------------------------------------- // Masked case -template -EVE_FORCEINLINE wide - fnms_(EVE_SUPPORTS(sse2_), - C const &cx, - wide const &v, - wide const &w, - wide const &x) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - - if constexpr( C::is_complete || abi_t::is_wide_logical ) - { - return fnms_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else + template + EVE_FORCEINLINE wide fnms_(EVE_SUPPORTS(avx512_), + C const &mask, + O const &, + wide const &v, + wide const &w, + wide const &x) noexcept + requires x86_abi> { - auto m = expand_mask(cx, as> {}).storage().value; + // NOTE: As those masked version are at the AVX512 level, they will always uses a variant of + // hardware VMADD, thus ensuring the pedantic behavior by default, hence why we don't care about + // PEDANTIC. As usual, we don't care about PROMOTE as we only accept similar types. - if constexpr( !C::has_alternative ) + if constexpr( C::is_complete ) return alternative(mask, v, as(v)); + else if constexpr( !C::has_alternative ) { - if constexpr( c == category::float32x16 ) return _mm512_mask_fnmsub_ps(v, m, w, x); + constexpr auto c = categorize>(); + [[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value; + + if constexpr( c == category::float32x16) return _mm512_mask_fnmsub_ps(v, m, w, x); else if constexpr( c == category::float64x8 ) return _mm512_mask_fnmsub_pd(v, m, w, x); else if constexpr( c == category::float32x8 ) return _mm256_mask_fnmsub_ps(v, m, w, x); else if constexpr( c == category::float64x4 ) return _mm256_mask_fnmsub_pd(v, m, w, x); else if constexpr( c == category::float32x4 ) return _mm_mask_fnmsub_ps(v, m, w, x); else if constexpr( c == category::float64x2 ) return _mm_mask_fnmsub_pd(v, m, w, x); - else return fnms_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else - { - auto src = alternative(cx, v, as> {}); - return fnms_(EVE_RETARGET(cpu_), cx, v, w, x); + // No rounding issue with integers, so we just mask over regular FNMS + else return if_else(mask, eve::fnms(v, w, x), v); } + else return if_else(mask, eve::fnms(v, w, x), alternative(mask, v, as(v))); } } -} diff --git a/include/eve/module/core/regular/impl/simd/x86/fsm.hpp b/include/eve/module/core/regular/impl/simd/x86/fsm.hpp index 07bbc76d2d..d8b43952bc 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fsm.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fsm.hpp @@ -8,48 +8,42 @@ #pragma once #include +#include #include -#include - -#include +#include namespace eve::detail { // ----------------------------------------------------------------------------------------------- // Masked case -template -EVE_FORCEINLINE wide - fsm_(EVE_SUPPORTS(sse2_), - C const &cx, - wide const &v, - wide const &w, - wide const &x) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - - if constexpr( C::is_complete || abi_t::is_wide_logical ) - { - return fsm_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else + template + EVE_FORCEINLINE wide fsm_(EVE_SUPPORTS(avx2_), + C const &mask, + O const &, + wide const &v, + wide const &w, + wide const &x) noexcept + requires x86_abi> { - auto m = expand_mask(cx, as> {}).storage().value; + // NOTE: As those masked version are at the AVX512 level, they will always uses a variant of + // hardware VMADD, thus ensuring the pedantic behavior by default, hence why we don't care about + // PEDANTIC. As usual, we don't care about PROMOTE as we only accept similar types. - if constexpr( !C::has_alternative ) + if constexpr( C::is_complete ) return alternative(mask, v, as(v)); + else if constexpr( !C::has_alternative ) { - if constexpr( c == category::float32x16 ) return _mm512_mask3_fmsub_ps(w, x, v, m); + constexpr auto c = categorize>(); + [[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value; + + if constexpr( c == category::float32x16) return _mm512_mask3_fmsub_ps(w, x, v, m); else if constexpr( c == category::float64x8 ) return _mm512_mask3_fmsub_pd(w, x, v, m); else if constexpr( c == category::float32x8 ) return _mm256_mask3_fmsub_ps(w, x, v, m); else if constexpr( c == category::float64x4 ) return _mm256_mask3_fmsub_pd(w, x, v, m); else if constexpr( c == category::float32x8 ) return _mm128_mask3_fmsub_ps(w, x, v, m); else if constexpr( c == category::float64x4 ) return _mm128_mask3_fmsub_pd(w, x, v, m); - else return fsm_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else - { - auto src = alternative(cx, v, as> {}); - return fsm_(EVE_RETARGET(cpu_), cx, v, w, x); + // No rounding issue with integers, so we just mask over regular FSM + else return if_else(mask, eve::fsm(v, w, x), v); } + else return if_else(mask, eve::fsm(v, w, x), alternative(mask, v, as(v))); } } -} diff --git a/include/eve/module/core/regular/impl/simd/x86/fsnm.hpp b/include/eve/module/core/regular/impl/simd/x86/fsnm.hpp index aa2b882e3f..eeba0d9e73 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fsnm.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fsnm.hpp @@ -8,48 +8,42 @@ #pragma once #include +#include #include -#include - -#include +#include namespace eve::detail { // ----------------------------------------------------------------------------------------------- // Masked case -template -EVE_FORCEINLINE wide - fsnm_(EVE_SUPPORTS(sse2_), - C const &cx, - wide const &v, - wide const &w, - wide const &x) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - - if constexpr( C::is_complete || abi_t::is_wide_logical ) - { - return fsnm_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else + template + EVE_FORCEINLINE wide fsnm_(EVE_SUPPORTS(sse2_), + C const &mask, + O const &, + wide const &v, + wide const &w, + wide const &x) noexcept + requires x86_abi> { - auto m = expand_mask(cx, as> {}).storage().value; + // NOTE: As those masked version are at the AVX512 level, they will always uses a variant of + // hardware VMADD, thus ensuring the pedantic behavior by default, hence why we don't care about + // PEDANTIC. As usual, we don't care about PROMOTE as we only accept similar types. - if constexpr( !C::has_alternative ) + if constexpr( C::is_complete ) return alternative(mask, v, as(v)); + else if constexpr( !C::has_alternative ) { - if constexpr( c == category::float32x16 ) return _mm512_mask3_fnmsub_ps(w, x, v, m); + constexpr auto c = categorize>(); + [[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value; + + if constexpr( c == category::float32x16) return _mm512_mask3_fnmsub_ps(w, x, v, m); else if constexpr( c == category::float64x8 ) return _mm512_mask3_fnmsub_pd(w, x, v, m); else if constexpr( c == category::float32x8 ) return _mm256_mask3_fnmsub_ps(w, x, v, m); else if constexpr( c == category::float64x4 ) return _mm256_mask3_fnmsub_pd(w, x, v, m); else if constexpr( c == category::float32x8 ) return _mm128_mask3_fnmsub_ps(w, x, v, m); else if constexpr( c == category::float64x4 ) return _mm128_mask3_fnmsub_pd(w, x, v, m); - else return fsnm_(EVE_RETARGET(cpu_), cx, v, w, x); - } - else - { - auto src = alternative(cx, v, as> {}); - return fsnm_(EVE_RETARGET(cpu_), cx, v, w, x); + // No rounding issue with integers, so we just mask over regular FSNM + else return if_else(mask, eve::fsnm(v, w, x), v); } + else return if_else(mask, eve::fsnm(v, w, x), alternative(mask, v, as(v))); } } -}