From bfc495463c5e7d7f52e6acd93c5589a2f65041f3 Mon Sep 17 00:00:00 2001 From: jtlap Date: Sun, 7 Apr 2024 12:31:38 +0200 Subject: [PATCH] New style FMA and fixed horner/reverse_horner/newton --- benchmarks/module/core/fma/pedantic/fma.hpp | 4 +- include/eve/module/core/decorator/regular.hpp | 7 +- include/eve/module/core/numeric/core.hpp | 1 - include/eve/module/core/numeric/fma.hpp | 23 --- include/eve/module/core/numeric/impl/fam.hpp | 4 +- include/eve/module/core/numeric/impl/fma.hpp | 72 -------- include/eve/module/core/numeric/impl/fms.hpp | 4 +- include/eve/module/core/numeric/impl/fnma.hpp | 4 +- include/eve/module/core/numeric/impl/fnms.hpp | 4 +- include/eve/module/core/numeric/impl/lerp.hpp | 4 +- .../core/numeric/impl/simd/arm/neon/fma.hpp | 26 --- .../module/core/numeric/impl/simd/ppc/fma.hpp | 32 ---- .../module/core/numeric/impl/simd/x86/fma.hpp | 32 ---- include/eve/module/core/pedantic/core.hpp | 1 - include/eve/module/core/pedantic/fma.hpp | 23 --- .../core/pedantic/impl/diff_of_prod.hpp | 2 +- include/eve/module/core/pedantic/impl/fam.hpp | 4 +- include/eve/module/core/pedantic/impl/fma.hpp | 72 -------- include/eve/module/core/pedantic/impl/fms.hpp | 4 +- .../eve/module/core/pedantic/impl/fnma.hpp | 4 +- .../eve/module/core/pedantic/impl/fnms.hpp | 4 +- .../eve/module/core/pedantic/impl/lerp.hpp | 4 +- .../core/pedantic/impl/simd/arm/neon/fma.hpp | 27 --- .../core/pedantic/impl/simd/ppc/fma.hpp | 32 ---- .../core/pedantic/impl/simd/x86/fma.hpp | 31 ---- .../module/core/pedantic/impl/sum_of_prod.hpp | 2 +- include/eve/module/core/promote/core.hpp | 2 - include/eve/module/core/promote/fma.hpp | 10 -- include/eve/module/core/promote/impl/fma.hpp | 28 ---- include/eve/module/core/regular/fma.hpp | 48 ++++-- include/eve/module/core/regular/impl/fma.hpp | 116 +++++++++---- .../core/regular/impl/simd/arm/neon/fma.hpp | 49 +++--- .../core/regular/impl/simd/arm/sve/fma.hpp | 33 ++-- .../module/core/regular/impl/simd/ppc/fma.hpp | 32 ++-- .../module/core/regular/impl/simd/x86/fma.hpp | 112 ++++++------- .../eve/module/math/detail/horner_impl.hpp | 12 +- include/eve/module/math/detail/newton.hpp | 84 ++++++++++ .../math/detail/reverse_horner_impl.hpp | 6 +- .../detail/tchebeval.inactive} | 0 .../eve/module/math/numeric/impl/horner.hpp | 22 +-- .../math/numeric/impl/reverse_horner.hpp | 24 +-- include/eve/module/math/numeric/math.hpp | 3 - include/eve/module/math/pedantic/math.hpp | 3 - include/eve/module/math/regular/horner.hpp | 67 ++++++-- .../eve/module/math/regular/impl/horner.hpp | 4 +- .../regular/impl/newton.hpp | 0 .../regular/impl/tchebeval.inactive} | 0 include/eve/module/math/regular/math.hpp | 1 + include/eve/module/math/regular/newton.hpp | 157 ++++++++++++++++++ .../module/math/regular/reverse_horner.hpp | 70 ++++++-- .../regular/tchebeval.inactive} | 49 ++++-- .../module/polynomial/detail/newton_impl.hpp | 4 +- ...beval_impl.hpp => tchebeval_impl.inactive} | 4 +- .../module/polynomial/numeric/impl/newton.hpp | 8 +- .../eve/module/polynomial/regular/newton.hpp | 78 --------- .../module/polynomial/regular/polynomial.hpp | 2 - .../eve/traits/overload/default_behaviors.hpp | 62 +++---- include/eve/traits/overload/supports.hpp | 13 +- test/doc/CMakeLists.txt | 1 + test/doc/core/regular/fma.cpp | 9 +- .../tchebeval.cpp => core/tchebeval.inactive} | 0 .../{polynomial/regular => math}/newton.cpp | 0 test/doc/math/regular/newton.cpp | 33 ++++ test/unit/module/core/fam.cpp | 6 +- test/unit/module/core/fma.cpp | 35 ++-- .../tchebeval.cpp => core/tchebeval.inactive} | 0 test/unit/module/math/horner.cpp | 107 +++--------- .../module/{polynomial => math}/newton.cpp | 61 +++---- test/unit/module/math/reverse_horner.cpp | 37 +---- 69 files changed, 831 insertions(+), 988 deletions(-) delete mode 100644 include/eve/module/core/numeric/fma.hpp delete mode 100644 include/eve/module/core/numeric/impl/fma.hpp delete mode 100644 include/eve/module/core/numeric/impl/simd/arm/neon/fma.hpp delete mode 100644 include/eve/module/core/numeric/impl/simd/ppc/fma.hpp delete mode 100644 include/eve/module/core/numeric/impl/simd/x86/fma.hpp delete mode 100644 include/eve/module/core/pedantic/fma.hpp delete mode 100644 include/eve/module/core/pedantic/impl/fma.hpp delete mode 100644 include/eve/module/core/pedantic/impl/simd/arm/neon/fma.hpp delete mode 100644 include/eve/module/core/pedantic/impl/simd/ppc/fma.hpp delete mode 100644 include/eve/module/core/pedantic/impl/simd/x86/fma.hpp delete mode 100644 include/eve/module/core/promote/fma.hpp delete mode 100644 include/eve/module/core/promote/impl/fma.hpp create mode 100644 include/eve/module/math/detail/newton.hpp rename include/eve/module/{core/detail/tchebeval.hpp => math/detail/tchebeval.inactive} (100%) rename include/eve/module/{polynomial => math}/regular/impl/newton.hpp (100%) rename include/eve/module/{polynomial/regular/impl/tchebeval.hpp => math/regular/impl/tchebeval.inactive} (100%) create mode 100644 include/eve/module/math/regular/newton.hpp rename include/eve/module/{polynomial/regular/tchebeval.hpp => math/regular/tchebeval.inactive} (62%) rename include/eve/module/polynomial/detail/{tchebeval_impl.hpp => tchebeval_impl.inactive} (98%) delete mode 100644 include/eve/module/polynomial/regular/newton.hpp rename test/doc/{polynomial/regular/tchebeval.cpp => core/tchebeval.inactive} (100%) rename test/doc/{polynomial/regular => math}/newton.cpp (100%) create mode 100644 test/doc/math/regular/newton.cpp rename test/unit/module/{polynomial/tchebeval.cpp => core/tchebeval.inactive} (100%) rename test/unit/module/{polynomial => math}/newton.cpp (51%) diff --git a/benchmarks/module/core/fma/pedantic/fma.hpp b/benchmarks/module/core/fma/pedantic/fma.hpp index 574f9fe37e..0192134aec 100644 --- a/benchmarks/module/core/fma/pedantic/fma.hpp +++ b/benchmarks/module/core/fma/pedantic/fma.hpp @@ -21,6 +21,6 @@ int main() eve::bench::experiment xp; run(EVE_NAME(std__fma) , xp, std__fma, arg0, arg1, arg2); - run(EVE_NAME(pedantic(eve::fma)) , xp, eve::pedantic(eve::fma), arg0, arg1, arg2); - run (EVE_NAME(pedantic(eve::fma)) , xp, eve::pedantic(eve::fma), arg0, arg1, arg2); + run(EVE_NAME(eve::fma[eve::pedantic]) , xp, eve::fma[eve::pedantic], arg0, arg1, arg2); + run (EVE_NAME(eve::fma[eve::pedantic]) , xp, eve::fma[eve::pedantic], arg0, arg1, arg2); } diff --git a/include/eve/module/core/decorator/regular.hpp b/include/eve/module/core/decorator/regular.hpp index c44fe49721..61a976297a 100644 --- a/include/eve/module/core/decorator/regular.hpp +++ b/include/eve/module/core/decorator/regular.hpp @@ -15,14 +15,17 @@ namespace eve //================================================================================================ //================================================================================================ // Function decorators mark-up used in function overloads -struct regular_type : decorator_ +struct regular_ { template constexpr EVE_FORCEINLINE auto operator()(Function f) const noexcept { - return [f](auto&&...args) { return f(EVE_FWD(args)...); }; + return f; } }; + +using regular_type = decorated; + //================================================================================================ //! @addtogroup core_decorators //! @{ diff --git a/include/eve/module/core/numeric/core.hpp b/include/eve/module/core/numeric/core.hpp index da69e54ce3..419b304e48 100644 --- a/include/eve/module/core/numeric/core.hpp +++ b/include/eve/module/core/numeric/core.hpp @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/include/eve/module/core/numeric/fma.hpp b/include/eve/module/core/numeric/fma.hpp deleted file mode 100644 index 73db9f2a88..0000000000 --- a/include/eve/module/core/numeric/fma.hpp +++ /dev/null @@ -1,23 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include - -#if defined(EVE_INCLUDE_X86_HEADER) -# include -#endif - -#if defined(EVE_INCLUDE_POWERPC_HEADER) -# include -#endif - -#if defined(EVE_INCLUDE_ARM_HEADER) -# include -#endif diff --git a/include/eve/module/core/numeric/impl/fam.hpp b/include/eve/module/core/numeric/impl/fam.hpp index 968da66b9b..252629eaa6 100644 --- a/include/eve/module/core/numeric/impl/fam.hpp +++ b/include/eve/module/core/numeric/impl/fam.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -33,6 +33,6 @@ EVE_FORCEINLINE T fam_(EVE_SUPPORTS(cpu_), numeric_type const&, T const& a, T const& b, T const& c) noexcept requires has_native_abi_v { - return numeric(fma)(b, c, a); + return fma[pedantic](b, c, a); } } diff --git a/include/eve/module/core/numeric/impl/fma.hpp b/include/eve/module/core/numeric/impl/fma.hpp deleted file mode 100644 index 9157d62afa..0000000000 --- a/include/eve/module/core/numeric/impl/fma.hpp +++ /dev/null @@ -1,72 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE auto -fma_(EVE_SUPPORTS(cpu_), numeric_type const&, T const& a, U const& b, V const& c) noexcept --> common_value_t -{ - return arithmetic_call(numeric(fma), a, b, c); -} - -template -EVE_FORCEINLINE T -fma_(EVE_SUPPORTS(cpu_), numeric_type const&, T const& a, T const& b, T const& c) noexcept requires - has_native_abi_v -{ - using elt_t = element_type_t; - if constexpr( std::is_same_v ) - { - return float32(float64(a) * float64(b) + float64(c)); - } - else if constexpr( std::is_same_v ) - { - if constexpr(scalar_value) - { - return std::fma(a, b, c); - } - else - { - auto stdfma = [](auto sa, auto sb, auto sc){return std::fma(sa, sb, sc); }; - return map(stdfma, a, b, c); - } - } - else if constexpr( std::is_integral_v ) - { - // correct fma has to ensure "no intermediate overflow". - // This is done in the case of signed integers by transtyping to unsigned type - // to perform the computations in a guaranted 2-complement environment - // since signed integer overflows in C++ produce "undefined results" - using u_t = as_integer_t; - return bit_cast(fma(bit_cast(a, as()), bit_cast(b, as()), bit_cast(c, as())), - as()); - } - else - { - return fma(a, b, c); - } -} -} diff --git a/include/eve/module/core/numeric/impl/fms.hpp b/include/eve/module/core/numeric/impl/fms.hpp index f12c6e91e7..f1f5f9c00b 100644 --- a/include/eve/module/core/numeric/impl/fms.hpp +++ b/include/eve/module/core/numeric/impl/fms.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include namespace eve::detail @@ -31,6 +31,6 @@ EVE_FORCEINLINE T fms_(EVE_SUPPORTS(cpu_), numeric_type const&, T const& a, T const& b, T const& c) noexcept requires has_native_abi_v { - return numeric(fma)(a, b, T(-c)); + return fma[pedantic](a, b, T(-c)); } } diff --git a/include/eve/module/core/numeric/impl/fnma.hpp b/include/eve/module/core/numeric/impl/fnma.hpp index 8a66007a03..90ed01f548 100644 --- a/include/eve/module/core/numeric/impl/fnma.hpp +++ b/include/eve/module/core/numeric/impl/fnma.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -33,6 +33,6 @@ EVE_FORCEINLINE T fnma_(EVE_SUPPORTS(cpu_), numeric_type const&, T const& a, T const& b, T const& c) noexcept requires has_native_abi_v { - return numeric(fma)(T(-a), b, c); + return fma[pedantic](T(-a), b, c); } } diff --git a/include/eve/module/core/numeric/impl/fnms.hpp b/include/eve/module/core/numeric/impl/fnms.hpp index 6f43cc1bd7..6166854305 100644 --- a/include/eve/module/core/numeric/impl/fnms.hpp +++ b/include/eve/module/core/numeric/impl/fnms.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -33,6 +33,6 @@ EVE_FORCEINLINE T fnms_(EVE_SUPPORTS(cpu_), numeric_type const&, T const& a, T const& b, T const& c) noexcept requires has_native_abi_v { - return -numeric(fma)(a, b, c); + return -fma[pedantic](a, b, c); } } diff --git a/include/eve/module/core/numeric/impl/lerp.hpp b/include/eve/module/core/numeric/impl/lerp.hpp index d1031ae5ea..ebc3ef42a3 100644 --- a/include/eve/module/core/numeric/impl/lerp.hpp +++ b/include/eve/module/core/numeric/impl/lerp.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -33,6 +33,6 @@ template EVE_FORCEINLINE T lerp_(EVE_SUPPORTS(cpu_), numeric_type const&, T const& a, T const& b, T const& t) noexcept { - return numeric(fma)(t, b, numeric(fnma)(t, a, a)); + return fma[pedantic](t, b, numeric(fnma)(t, a, a)); } } diff --git a/include/eve/module/core/numeric/impl/simd/arm/neon/fma.hpp b/include/eve/module/core/numeric/impl/simd/arm/neon/fma.hpp deleted file mode 100644 index 49b0c3b7b9..0000000000 --- a/include/eve/module/core/numeric/impl/simd/arm/neon/fma.hpp +++ /dev/null @@ -1,26 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(neon128_), - numeric_type const&, - wide const &v0, - wide const &v1, - wide const &v2) noexcept requires arm_abi> -{ - return fma(v0, v1, v2); -} -} diff --git a/include/eve/module/core/numeric/impl/simd/ppc/fma.hpp b/include/eve/module/core/numeric/impl/simd/ppc/fma.hpp deleted file mode 100644 index 44a63acc14..0000000000 --- a/include/eve/module/core/numeric/impl/simd/ppc/fma.hpp +++ /dev/null @@ -1,32 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include -#include - -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(vmx_), - numeric_type const&, - wide const &v0, - wide const &v1, - wide const &v2) noexcept requires ppc_abi> -{ - if constexpr( std::is_floating_point_v ) - return vec_madd(v0.storage(), v1.storage(), v2.storage()); - else return v0.storage() * v1.storage() + v2.storage(); -} -} diff --git a/include/eve/module/core/numeric/impl/simd/x86/fma.hpp b/include/eve/module/core/numeric/impl/simd/x86/fma.hpp deleted file mode 100644 index 2690dc8cf8..0000000000 --- a/include/eve/module/core/numeric/impl/simd/x86/fma.hpp +++ /dev/null @@ -1,32 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include -#include - -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(avx2_), - numeric_type const&, - wide const &a, - wide const &b, - wide const &c) noexcept requires x86_abi> -{ - if constexpr( supports_fma3 ) { return fma(a, b, c); } - else { return fma_(EVE_RETARGET(cpu_), numeric_type(), a, b, c); } -} - -} diff --git a/include/eve/module/core/pedantic/core.hpp b/include/eve/module/core/pedantic/core.hpp index 160aa142b1..0bcd746c3e 100644 --- a/include/eve/module/core/pedantic/core.hpp +++ b/include/eve/module/core/pedantic/core.hpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/include/eve/module/core/pedantic/fma.hpp b/include/eve/module/core/pedantic/fma.hpp deleted file mode 100644 index 77a582a3c3..0000000000 --- a/include/eve/module/core/pedantic/fma.hpp +++ /dev/null @@ -1,23 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include - -#if defined(EVE_INCLUDE_X86_HEADER) -# include -#endif - -#if defined(EVE_INCLUDE_POWERPC_HEADER) -# include -#endif - -#if defined(EVE_INCLUDE_ARM_HEADER) -# include -#endif diff --git a/include/eve/module/core/pedantic/impl/diff_of_prod.hpp b/include/eve/module/core/pedantic/impl/diff_of_prod.hpp index 740725e1d2..85ea4137bf 100644 --- a/include/eve/module/core/pedantic/impl/diff_of_prod.hpp +++ b/include/eve/module/core/pedantic/impl/diff_of_prod.hpp @@ -40,7 +40,7 @@ namespace eve::detail else { T mcd = c * d; - T err = pedantic(fma)(-c, d, mcd); + T err = fma[pedantic](-c, d, mcd); T dop = pedantic(fms)(a, b, mcd); return if_else(is_finite(err), dop + err, dop); } diff --git a/include/eve/module/core/pedantic/impl/fam.hpp b/include/eve/module/core/pedantic/impl/fam.hpp index 1e43f7a5eb..721b4406bd 100644 --- a/include/eve/module/core/pedantic/impl/fam.hpp +++ b/include/eve/module/core/pedantic/impl/fam.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -32,6 +32,6 @@ EVE_FORCEINLINE T fam_(EVE_SUPPORTS(cpu_), pedantic_type const&, T const& a, T const& b, T const& c) noexcept requires has_native_abi_v { - return pedantic(fma)(b, c, a); + return fma[pedantic](b, c, a); } } diff --git a/include/eve/module/core/pedantic/impl/fma.hpp b/include/eve/module/core/pedantic/impl/fma.hpp deleted file mode 100644 index 02bd436032..0000000000 --- a/include/eve/module/core/pedantic/impl/fma.hpp +++ /dev/null @@ -1,72 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE auto -fma_(EVE_SUPPORTS(cpu_), pedantic_type const&, T const& a, U const& b, V const& c) noexcept --> decltype(fsm(a, b, c)) -{ - return arithmetic_call(pedantic(fma), a, b, c); -} - -template -EVE_FORCEINLINE T -fma_(EVE_SUPPORTS(cpu_), pedantic_type const&, T const& a, T const& b, T const& c) noexcept requires - has_native_abi_v -{ - using elt_t = element_type_t; - if constexpr( std::is_same_v ) - { - return float32(float64(a) * float64(b) + float64(c)); - } - else if constexpr( std::is_same_v ) - { - if constexpr(scalar_value) - { - return std::fma(a, b, c); - } - else - { - auto stdfma = [](auto sa, auto sb, auto sc){return std::fma(sa, sb, sc); }; - return map(stdfma, a, b, c); - } - } - else if constexpr( std::is_integral_v ) - { - // correct fma has to ensure "no intermediate overflow". - // This is done in the case of signed integers by transtyping to unsigned type - // to perform the computations in a guaranted 2-complement environment - // since signed integer overflows in C++ produce "undefined results" - using u_t = as_integer_t; - return bit_cast(fma(bit_cast(a, as()), bit_cast(b, as()), bit_cast(c, as())), - as()); - } - else - { - return fma(a, b, c); - } -} -} diff --git a/include/eve/module/core/pedantic/impl/fms.hpp b/include/eve/module/core/pedantic/impl/fms.hpp index 22b6c95767..8277ebfb0e 100644 --- a/include/eve/module/core/pedantic/impl/fms.hpp +++ b/include/eve/module/core/pedantic/impl/fms.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include namespace eve::detail @@ -30,6 +30,6 @@ EVE_FORCEINLINE T fms_(EVE_SUPPORTS(cpu_), pedantic_type const&, T const& a, T const& b, T const& c) noexcept requires has_native_abi_v { - return pedantic(fma)(a, b, T(-c)); + return fma[pedantic](a, b, T(-c)); } } diff --git a/include/eve/module/core/pedantic/impl/fnma.hpp b/include/eve/module/core/pedantic/impl/fnma.hpp index 6263c5d06b..a500d26a7d 100644 --- a/include/eve/module/core/pedantic/impl/fnma.hpp +++ b/include/eve/module/core/pedantic/impl/fnma.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -39,6 +39,6 @@ fnma_(EVE_SUPPORTS(cpu_), T const& b, T const& c) noexcept requires has_native_abi_v { - return pedantic(fma)(T(-a), b, c); + return fma[pedantic](T(-a), b, c); } } diff --git a/include/eve/module/core/pedantic/impl/fnms.hpp b/include/eve/module/core/pedantic/impl/fnms.hpp index 729310fae9..0dd46ab6cc 100644 --- a/include/eve/module/core/pedantic/impl/fnms.hpp +++ b/include/eve/module/core/pedantic/impl/fnms.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -39,6 +39,6 @@ fnms_(EVE_SUPPORTS(cpu_), T const& b, T const& c) noexcept requires has_native_abi_v { - return -pedantic(fma)(a, b, c); + return -fma[pedantic](a, b, c); } } diff --git a/include/eve/module/core/pedantic/impl/lerp.hpp b/include/eve/module/core/pedantic/impl/lerp.hpp index a5ade6ad78..8630893773 100644 --- a/include/eve/module/core/pedantic/impl/lerp.hpp +++ b/include/eve/module/core/pedantic/impl/lerp.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -36,6 +36,6 @@ template EVE_FORCEINLINE T lerp_(EVE_SUPPORTS(cpu_), pedantic_type const&, T const& a, T const& b, T const& t) noexcept { - return pedantic(fma)(t, b, pedantic(fnma)(t, a, a)); + return fma[pedantic](t, b, pedantic(fnma)(t, a, a)); } } diff --git a/include/eve/module/core/pedantic/impl/simd/arm/neon/fma.hpp b/include/eve/module/core/pedantic/impl/simd/arm/neon/fma.hpp deleted file mode 100644 index 4edf136de1..0000000000 --- a/include/eve/module/core/pedantic/impl/simd/arm/neon/fma.hpp +++ /dev/null @@ -1,27 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(neon128_), - pedantic_type const&, - wide const &v0, - wide const &v1, - wide const &v2) noexcept requires arm_abi> -{ - return fma(v0, v1, v2); -} -} diff --git a/include/eve/module/core/pedantic/impl/simd/ppc/fma.hpp b/include/eve/module/core/pedantic/impl/simd/ppc/fma.hpp deleted file mode 100644 index 789994bf7d..0000000000 --- a/include/eve/module/core/pedantic/impl/simd/ppc/fma.hpp +++ /dev/null @@ -1,32 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include -#include - -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(vmx_), - pedantic_type const&, - wide const &v0, - wide const &v1, - wide const &v2) noexcept requires ppc_abi> -{ - if constexpr( std::is_floating_point_v ) - return vec_madd(v0.storage(), v1.storage(), v2.storage()); - else return v0.storage() * v1.storage() + v2.storage(); -} -} diff --git a/include/eve/module/core/pedantic/impl/simd/x86/fma.hpp b/include/eve/module/core/pedantic/impl/simd/x86/fma.hpp deleted file mode 100644 index adb68e30af..0000000000 --- a/include/eve/module/core/pedantic/impl/simd/x86/fma.hpp +++ /dev/null @@ -1,31 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include -#include - -#include - -namespace eve::detail -{ -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(avx2_), - pedantic_type const&, - wide const &a, - wide const &b, - wide const &c) noexcept requires x86_abi> -{ - if constexpr( supports_fma3 ) { return fma(a, b, c); } - else { return fma_(EVE_RETARGET(cpu_), pedantic_type(), a, b, c); } -} -} diff --git a/include/eve/module/core/pedantic/impl/sum_of_prod.hpp b/include/eve/module/core/pedantic/impl/sum_of_prod.hpp index 2b27e19086..bd72deb029 100644 --- a/include/eve/module/core/pedantic/impl/sum_of_prod.hpp +++ b/include/eve/module/core/pedantic/impl/sum_of_prod.hpp @@ -40,7 +40,7 @@ namespace eve::detail else { T mcd = -c * d; - T err = pedantic(fma)(c, d, mcd); + T err = fma[pedantic](c, d, mcd); T dop = pedantic(fms)(a, b, mcd); T r = if_else(is_finite(err), dop + err, dop); return r; diff --git a/include/eve/module/core/promote/core.hpp b/include/eve/module/core/promote/core.hpp index 06fc70b6ea..1b31dd1a46 100644 --- a/include/eve/module/core/promote/core.hpp +++ b/include/eve/module/core/promote/core.hpp @@ -7,8 +7,6 @@ //================================================================================================== #pragma once - -#include #include #include #include diff --git a/include/eve/module/core/promote/fma.hpp b/include/eve/module/core/promote/fma.hpp deleted file mode 100644 index af122c5e74..0000000000 --- a/include/eve/module/core/promote/fma.hpp +++ /dev/null @@ -1,10 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include diff --git a/include/eve/module/core/promote/impl/fma.hpp b/include/eve/module/core/promote/impl/fma.hpp deleted file mode 100644 index 6389188baf..0000000000 --- a/include/eve/module/core/promote/impl/fma.hpp +++ /dev/null @@ -1,28 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include - -#include -#include - -namespace eve::detail -{ - template - EVE_FORCEINLINE auto - fma_(EVE_SUPPORTS(cpu_), promote_type const&, T const& a, U const& b, V const& c) noexcept - { - using er_t = common_type_t, element_type_t, element_type_t>; - auto cv_t = as(eve::as()); - return fma(convert(a, cv_t), convert(b, cv_t), convert(c,cv_t)); - } -} diff --git a/include/eve/module/core/regular/fma.hpp b/include/eve/module/core/regular/fma.hpp index 893ecf904e..651c1b4a04 100644 --- a/include/eve/module/core/regular/fma.hpp +++ b/include/eve/module/core/regular/fma.hpp @@ -1,18 +1,37 @@ -//================================================================================================== +//====================================================================================================================== /* EVE - Expressive Vector Engine Copyright : EVE Project Contributors SPDX-License-Identifier: BSL-1.0 */ -//================================================================================================== +//====================================================================================================================== #pragma once + +#include "eve/traits/common_type.hpp" +#include "eve/traits/common_value.hpp" #include -#include +#include +#include namespace eve { -//================================================================================================ +template +struct fma_t : elementwise_callable +{ + template + requires(Options::contains(promote2)) + constexpr EVE_FORCEINLINE auto operator()(T a, U b, V c) const noexcept { return EVE_DISPATCH_CALL(a,b,c); } + + template + requires(!Options::contains(promote2)) + constexpr EVE_FORCEINLINE + common_value_t operator()(T a, U b, V c) const noexcept { return EVE_DISPATCH_CALL(a,b,c); } + + EVE_CALLABLE_OBJECT(fma_t, fma_); +}; + +//====================================================================================================================== //! @addtogroup core_fma_family //! @{ //! @var fma @@ -45,7 +64,7 @@ namespace eve //! //! The value of `x*y+z` as if calculated to infinite precision //! and rounded once is returned, but only if the hardware is in capacity -//! to do it at reasonnable cost. +//! to do it at reasonable cost. //! //! @note //! This `fma` implementation provides those properties for all @@ -64,22 +83,17 @@ namespace eve //! version of `fma` which is //! equivalent to `if_else(mask, fma(x, ...), x)` //! -//! **Example** -//! -//! @godbolt{doc/core/masked/fma.cpp} -//! -//! * eve::pedantic, eve::numeric +//! **Example** //! -//! * The call `pedantic(fma)(x,y,z)` ensures the one rounding property. -//! This can be very expensive if the system has no hardware capability. +//! @godbolt{doc/core/masked/fma.cpp} //! -//! * The call `numeric(fma)(x,y,z)` ensures the full compliance to fma properties. -//! This can be very expensive if the system has no hardware capability. +//! * eve::pedantic //! -//! * see the above regular example. +//! The call `fma[pedantic](x,y,z)` ensures the full compliance to fma properties. +//! This can be very expensive if the system has no hardware capability. //! @} -//================================================================================================ -EVE_MAKE_CALLABLE(fma_, fma); +//====================================================================================================================== +inline constexpr auto fma = functor; } #include diff --git a/include/eve/module/core/regular/impl/fma.hpp b/include/eve/module/core/regular/impl/fma.hpp index 8c1d2fa868..60307e9365 100644 --- a/include/eve/module/core/regular/impl/fma.hpp +++ b/include/eve/module/core/regular/impl/fma.hpp @@ -7,44 +7,94 @@ //================================================================================================== #pragma once -#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace eve::detail { -template -EVE_FORCEINLINE auto -fma_(EVE_SUPPORTS(cpu_), - T const& a, - U const& b, - V const& c) noexcept --> common_value_t -{ - using r_t = common_value_t; - return arithmetic_call(fma, r_t(a), r_t(b), r_t(c)); -} + template + EVE_FORCEINLINE constexpr auto fma_(EVE_REQUIRES(cpu_), O const& o, T const& a, U const& b, V const& c) + { + // PROMOTE --------------------- + if constexpr(O::contains(promote2)) + { + using er_t = common_type_t, element_type_t, element_type_t>; + constexpr auto tgt = as(eve::as()); + return fma[o.drop(promote2)](convert(a, tgt), convert(b, tgt), convert(c,tgt)); + } + // OTHERS --------------------- + else + { + using r_t = common_value_t; + return fma[o](r_t(a), r_t(b), r_t(c)); + } + } -template // fallback never taken if proper intrinsics are at hand -EVE_FORCEINLINE T -fma_(EVE_SUPPORTS(cpu_), T const& a, T const& b, T const& c) noexcept -{ - return a * b + c; -} + // PROMOTE may mix values of different types, making automatic masking from the default behaviors unwieldy + // We catch the mask + PROMOTE case and handle it properly + // No special provision is made for PEDANTIC as it is handled in the second pass without issues. + template + EVE_FORCEINLINE constexpr auto + fma_(EVE_REQUIRES(cpu_), C const& mask, O const& o, T const& a, U const& b, V const& c) + { + // PROMOTE --------------------- + if constexpr(O::contains(promote2)) + { + using er_t = common_type_t, element_type_t, element_type_t>; + constexpr auto tgt = as(eve::as()); -//================================================================================================ -// Masked case -//================================================================================================ -template -EVE_FORCEINLINE auto -fma_(EVE_SUPPORTS(cpu_), C const& cond, T const& a, U const& b, V const& c) noexcept --> common_value_t -{ - using r_t = common_value_t; - return mask_op(cond, eve::fma, r_t(a), r_t(b), r_t(c)); -} + return fma[o.drop(promote2)](convert(a, tgt), convert(b, tgt), convert(c,tgt)); + } + // OTHERS --------------------- + else + { + using r_t = common_value_t; + // Drop the mask key to prevent circular calls + auto opt = o.drop(condition_key); + return detail::mask_op(mask, detail::return_2nd, r_t(a), fma[opt](r_t(a), r_t(b), r_t(c))); + } + } + template + EVE_FORCEINLINE constexpr auto fma_(EVE_REQUIRES(cpu_), O const& o, T const& a, T const& b, T const& c) + { + // PROMOTE --------------------- + // We promote before going pedantic in case it changes the behavior + if constexpr(O::contains(promote2)) return fma[o.drop(promote2)](a,b,c); + // PEDANTIC --------------------- + else if constexpr(O::contains(pedantic2)) + { + if constexpr( std::same_as, float> ) + { + constexpr auto tgt = as{}; + return convert(convert(a,tgt) * convert(b,tgt) + convert(c,tgt), as_element(a)); + } + else if constexpr( std::same_as, double> ) + { + [[maybe_unused]] auto stdfma = [](auto sa, auto sb, auto sc){return std::fma(sa, sb, sc); }; + if constexpr(scalar_value) return std::fma(a, b, c); + else return map(stdfma, a, b, c); + } + else if constexpr( std::is_integral_v> ) + { + // Pedantic fma has to ensure "no intermediate overflow". + // This is done in the case of signed integers by trans-typing to unsigned type + // to perform the computations in a guarantee 2-complement environment + // since signed integer overflows in C++ produce "undefined results" + constexpr auto tgt = as>{}; + return bit_cast(fma(bit_cast(a,tgt), bit_cast(b,tgt), bit_cast(c,tgt)), as()); + } + else return fma(a, b, c); + } + // REGULAR --------------------- + else return a * b + c; + } } diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/fma.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/fma.hpp index 9f059792da..3070b7eb19 100644 --- a/include/eve/module/core/regular/impl/simd/arm/neon/fma.hpp +++ b/include/eve/module/core/regular/impl/simd/arm/neon/fma.hpp @@ -8,40 +8,41 @@ #pragma once #include +#include #include -#include +#include namespace eve::detail { -template +template EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(neon128_), - wide const &v0, - wide const &v1, - wide const &v2) noexcept requires arm_abi> +fma_(EVE_REQUIRES(neon128_), O const& opts, wide const& a, wide const& b, wide const& c) noexcept +requires arm_abi> { + // We don't care about PROMOTE as we takes single type + // We don't care about PEDANTIC as we always end up using a vmla/vfma or we map constexpr auto cat = categorize>(); - if constexpr( cat == category::float32x2 ) return vfma_f32(v2, v1, v0); - else if constexpr( cat == category::float32x4 ) return vfmaq_f32(v2, v1, v0); - else if constexpr( cat == category::int32x4 ) return vmlaq_s32(v2, v1, v0); - else if constexpr( cat == category::int16x8 ) return vmlaq_s16(v2, v1, v0); - else if constexpr( cat == category::int8x16 ) return vmlaq_s8(v2, v1, v0); - else if constexpr( cat == category::int32x2 ) return vmla_s32(v2, v1, v0); - else if constexpr( cat == category::int16x4 ) return vmla_s16(v2, v1, v0); - else if constexpr( cat == category::int8x8 ) return vmla_s8(v2, v1, v0); - else if constexpr( cat == category::uint32x4 ) return vmlaq_u32(v2, v1, v0); - else if constexpr( cat == category::uint16x8 ) return vmlaq_u16(v2, v1, v0); - else if constexpr( cat == category::uint8x16 ) return vmlaq_u8(v2, v1, v0); - else if constexpr( cat == category::uint32x2 ) return vmla_u32(v2, v1, v0); - else if constexpr( cat == category::uint16x4 ) return vmla_u16(v2, v1, v0); - else if constexpr( cat == category::uint8x8 ) return vmla_u8(v2, v1, v0); + if constexpr( cat == category::float32x4 ) return vfmaq_f32(c, b, a); + else if constexpr( cat == category::float32x2 ) return vfma_f32 (c, b, a); + else if constexpr( cat == category::int32x4 ) return vmlaq_s32(c, b, a); + else if constexpr( cat == category::int32x2 ) return vmla_s32 (c, b, a); + else if constexpr( cat == category::int16x8 ) return vmlaq_s16(c, b, a); + else if constexpr( cat == category::int16x4 ) return vmla_s16 (c, b, a); + else if constexpr( cat == category::int8x16 ) return vmlaq_s8 (c, b, a); + else if constexpr( cat == category::int8x8 ) return vmla_s8 (c, b, a); + else if constexpr( cat == category::uint32x4 ) return vmlaq_u32(c, b, a); + else if constexpr( cat == category::uint32x2 ) return vmla_u32 (c, b, a); + else if constexpr( cat == category::uint16x8 ) return vmlaq_u16(c, b, a); + else if constexpr( cat == category::uint16x4 ) return vmla_u16 (c, b, a); + else if constexpr( cat == category::uint8x16 ) return vmlaq_u8 (c, b, a); + else if constexpr( cat == category::uint8x8 ) return vmla_u8 (c, b, a); else if constexpr( current_api >= asimd ) { - if constexpr( cat == category::float64x1 ) return vfma_f64(v2, v1, v0); - else if constexpr( cat == category::float64x2 ) return vfmaq_f64(v2, v1, v0); - else return map(fma, v0, v1, v2); + if constexpr( cat == category::float64x2 ) return vfmaq_f64(c, b, a); + else if constexpr( cat == category::float64x1 ) return vfma_f64 (c, b, a); + else return fma_(EVE_TARGETS(cpu_), opts, a, b, c); } - else return map(fma, v0, v1, v2); + else return fma_(EVE_TARGETS(cpu_), opts, a, b, c); } } diff --git a/include/eve/module/core/regular/impl/simd/arm/sve/fma.hpp b/include/eve/module/core/regular/impl/simd/arm/sve/fma.hpp index cdb2fe1a7f..0fa422f447 100644 --- a/include/eve/module/core/regular/impl/simd/arm/sve/fma.hpp +++ b/include/eve/module/core/regular/impl/simd/arm/sve/fma.hpp @@ -1,37 +1,38 @@ -//================================================================================================== +//====================================================================================================================== /* EVE - Expressive Vector Engine Copyright : EVE Project Contributors SPDX-License-Identifier: BSL-1.0 */ -//================================================================================================== +//====================================================================================================================== #pragma once #include -#include -#include +#include +#include +#include namespace eve::detail { -template +template EVE_FORCEINLINE auto -fma_(EVE_SUPPORTS(sve_), wide v0, wide v1, wide v2) noexcept -> wide +fma_(EVE_REQUIRES(sve_), O const&, wide const& a, wide const& b, wide const& c) noexcept -> wide requires sve_abi> { - return fma[ignore_none](v0, v1, v2); + // We don't care about PEDANTIC as this is a proper FMA. + // We don't care about PROMOTE as we only accept similar types. + return svmad_x(sve_true(), a, b, c); } -template +template EVE_FORCEINLINE auto -fma_(EVE_SUPPORTS(sve_), C cond, wide v0, wide v1, wide v2) noexcept -> wide +fma_(EVE_SUPPORTS(sve_), C cond, O const&, wide a, wide b, wide c) noexcept -> wide requires sve_abi> { - if constexpr( C::is_complete && C::is_inverted ) return svmad_x(sve_true(), v0, v1, v2); - else - { - auto const alt = alternative(cond, v0, as(v0)); - if constexpr( C::is_complete && !C::is_inverted ) return alt; - else return svmad_m(cond.mask(as{}), alt, v1, v2); - } + // We don't care about PEDANTIC as this is a proper FMA. + // We don't care about PROMOTE as we only accept similar types. + auto const alt = alternative(cond, a, as(a)); + if constexpr( C::is_complete && !C::is_inverted ) return alt; + else return svmad_m(cond.mask(as{}), alt, b, c); } } diff --git a/include/eve/module/core/regular/impl/simd/ppc/fma.hpp b/include/eve/module/core/regular/impl/simd/ppc/fma.hpp index 3d579af04e..eaf027bb90 100644 --- a/include/eve/module/core/regular/impl/simd/ppc/fma.hpp +++ b/include/eve/module/core/regular/impl/simd/ppc/fma.hpp @@ -1,31 +1,29 @@ -//================================================================================================== +//====================================================================================================================== /* EVE - Expressive Vector Engine Copyright : EVE Project Contributors SPDX-License-Identifier: BSL-1.0 */ -//================================================================================================== +//====================================================================================================================== #pragma once #include #include -#include +#include #include -#include - -#include namespace eve::detail { -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(vmx_), - wide const &v0, - wide const &v1, - wide const &v2) noexcept requires ppc_abi> -{ - if constexpr( std::is_floating_point_v ) - return vec_madd(v0.storage(), v1.storage(), v2.storage()); - else return v0.storage() * v1.storage() + v2.storage(); -} + template + EVE_FORCEINLINE wide + fma_(EVE_REQUIRES(vmx_), O const& opts, wide const& a, wide const& b, wide const& c) noexcept + requires ppc_abi> + { + // REGULAR --- + // Only floating point has a special behavior and are PEDANTIC by design + if constexpr( std::floating_point ) return vec_madd(a.storage(), b.storage(), c.storage()); + // PEDANTIC, INTEGRAL, ETC... --- + // We don't care about PROMOTE as we only accept similar types. + else return fma_(EVE_TARGETS(cpu_), opts, a, b, c); + } } diff --git a/include/eve/module/core/regular/impl/simd/x86/fma.hpp b/include/eve/module/core/regular/impl/simd/x86/fma.hpp index 70305dc8f2..9fdb071f17 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fma.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fma.hpp @@ -1,81 +1,77 @@ -//================================================================================================== +//====================================================================================================================== /* EVE - Expressive Vector Engine Copyright : EVE Project Contributors SPDX-License-Identifier: BSL-1.0 */ -//================================================================================================== +//====================================================================================================================== #pragma once #include +#include #include -#include -#include - -#include +#include namespace eve::detail { -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(avx2_), - wide const &a, - wide const &b, - wide const &c) noexcept requires x86_abi> -{ - if constexpr( std::is_integral_v ) { return fma_(EVE_RETARGET(cpu_), a, b, c); } - else + template + EVE_FORCEINLINE wide + fma_(EVE_REQUIRES(sse2_), O const& opts, wide const& a, wide const& b, wide const& c) noexcept + requires x86_abi> { - constexpr auto cat = categorize>(); - - if constexpr( cat == category::float64x8 ) return _mm512_fmadd_pd(a, b, c); - else if constexpr( cat == category::float32x16 ) return _mm512_fmadd_ps(a, b, c); - else if constexpr( supports_fma3 ) + // Integral don't do anything special ---- + if constexpr( std::integral ) return fma_(EVE_TARGETS(cpu_), opts, a, b, c); + // PEDANTIC --- + else if constexpr(O::contains(pedantic2) ) { - if constexpr( cat == category::float64x4 ) return _mm256_fmadd_pd(a, b, c); - else if constexpr( cat == category::float64x2 ) return _mm_fmadd_pd(a, b, c); - else if constexpr( cat == category::float32x8 ) return _mm256_fmadd_ps(a, b, c); - else if constexpr( cat == category::float32x4 ) return _mm_fmadd_ps(a, b, c); + if constexpr( supports_fma3 ) return fma(a, b, c); + else return fma_(EVE_TARGETS(cpu_), opts, a, b, c); } - else return fma_(EVE_RETARGET(cpu_), a, b, c); - } -} - -// ----------------------------------------------------------------------------------------------- -// Masked case -template -EVE_FORCEINLINE wide - fma_(EVE_SUPPORTS(avx512_), - C const &cx, - wide const &v, - wide const &w, - wide const &x) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); + // REGULAR --- + // we don't care about PROMOTE as we only accept similar types. + else + { + constexpr auto cat = categorize>(); - if constexpr( C::is_complete || abi_t::is_wide_logical ) - { - return fma_(EVE_RETARGET(cpu_), cx, v, w, x); + if constexpr( cat == category::float64x8 ) return _mm512_fmadd_pd(a, b, c); + else if constexpr( cat == category::float32x16 ) return _mm512_fmadd_ps(a, b, c); + else if constexpr( supports_fma3) + { + if constexpr( cat == category::float64x4 ) return _mm256_fmadd_pd(a, b, c); + else if constexpr( cat == category::float64x2 ) return _mm_fmadd_pd (a, b, c); + else if constexpr( cat == category::float32x8 ) return _mm256_fmadd_ps(a, b, c); + else if constexpr( cat == category::float32x4 ) return _mm_fmadd_ps (a, b, c); + } + else return fma_(EVE_TARGETS(cpu_), opts, a, b, c); + } } - else + + template + EVE_FORCEINLINE wide + fma_( EVE_REQUIRES(avx512_), C const& mask, O const& + , wide const& a, wide const& b, wide const& c + ) + noexcept requires x86_abi> { - auto m = expand_mask(cx, as> {}).storage().value; + // NOTE: As those masked version are at the AVX512 level, they will always uses a variant of + // hardware VMADD, thus ensuring the pedantic behavior by default, hence why we don't care about + // PEDANTIC. As usual, we don't care about PROMOTE as we only accept similar types. - if constexpr( !C::has_alternative ) - { - if constexpr( c == category::float32x16 ) return _mm512_mask_fmadd_ps(v, m, w, x); - else if constexpr( c == category::float64x8 ) return _mm512_mask_fmadd_pd(v, m, w, x); - else if constexpr( c == category::float32x8 ) return _mm256_mask_fmadd_ps(v, m, w, x); - else if constexpr( c == category::float64x4 ) return _mm256_mask_fmadd_pd(v, m, w, x); - else if constexpr( c == category::float32x4 ) return _mm_mask_fmadd_ps(v, m, w, x); - else if constexpr( c == category::float64x2 ) return _mm_mask_fmadd_pd(v, m, w, x); - else return if_else(cx, eve::fma(v, w, x), v); - } - else + if constexpr( C::is_complete ) return alternative(mask, a, as(a)); + else if constexpr( !C::has_alternative ) { - auto src = alternative(cx, v, as> {}); - return fma_(EVE_RETARGET(cpu_), cx, v, w, x); + constexpr auto cx = categorize>(); + [[maybe_unused]] auto const m = expand_mask(mask, as(a)).storage().value; + + if constexpr( cx == category::float32x16 ) return _mm512_mask_fmadd_ps(a, m, b, c); + else if constexpr( cx == category::float64x8 ) return _mm512_mask_fmadd_pd(a, m, b, c); + else if constexpr( cx == category::float32x8 ) return _mm256_mask_fmadd_ps(a, m, b, c); + else if constexpr( cx == category::float64x4 ) return _mm256_mask_fmadd_pd(a, m, b, c); + else if constexpr( cx == category::float32x4 ) return _mm_mask_fmadd_ps (a, m, b, c); + else if constexpr( cx == category::float64x2 ) return _mm_mask_fmadd_pd (a, m, b, c); + // No rounding issue with integers, so we just mask over regular FMA + else return if_else(mask, eve::fma(a, b, c), a); } + else return if_else(mask, eve::fma(a, b, c), alternative(mask, a, as(a))); } } -} diff --git a/include/eve/module/math/detail/horner_impl.hpp b/include/eve/module/math/detail/horner_impl.hpp index e707e8f083..894e3963e3 100644 --- a/include/eve/module/math/detail/horner_impl.hpp +++ b/include/eve/module/math/detail/horner_impl.hpp @@ -10,12 +10,8 @@ #include #include #include - -#include #include -#include - namespace eve::detail { //================================================================================================ @@ -38,7 +34,7 @@ namespace eve::detail // else // { // auto x = r_t(xx); -// auto dfma = d(fma); +// auto dfma = fma[d]; // r_t that(zero(as())); // auto next = [&](auto that, auto arg) { return dfma(x, that, arg); }; // ((that = next(that, cs)), ...); @@ -61,9 +57,8 @@ namespace eve::detail else { auto x = r_t(xx); - auto dfma = d(fma); r_t that(zero(as())); - auto next = [&](auto that, auto arg) { return dfma(that, x, arg); }; + auto next = [&](auto that, auto arg) { return fma[d](that, x, arg); }; ((that = next(that, cs)), ...); return that; } @@ -87,9 +82,8 @@ namespace eve::detail else { using std::advance; - auto dfma = d(fma); auto that = r_t(*cur); - auto step = [&](auto that, auto arg) { return dfma(x, that, arg); }; + auto step = [&](auto that, auto arg) { return fma[d](x, that, arg); }; for( advance(cur, 1); cur != last; advance(cur, 1) ) that = step(that, *cur); return that; } diff --git a/include/eve/module/math/detail/newton.hpp b/include/eve/module/math/detail/newton.hpp new file mode 100644 index 0000000000..564ac2b668 --- /dev/null +++ b/include/eve/module/math/detail/newton.hpp @@ -0,0 +1,84 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +#include +#include + +#include + +namespace eve::detail +{ +//================================================================================================ +//== Newton with iterators +//================================================================================================ +template +EVE_FORCEINLINE constexpr auto +newton_impl(D const& d, T0 xx, IT1 const& firstc, IT1 const& lastc, IT2 const& firstn) noexcept +{ + using r_t = common_compatible_t::value_type, + typename std::iterator_traits::value_type>; + auto x = r_t(xx); + if( firstc == lastc ) return r_t(0); + if( std::distance(firstc, lastc) == 1 ) return r_t(*firstc); + else + { + using std::advance; + auto curc = firstc; + auto curn = firstn; + advance(curc, 1); + advance(curn, 1); + auto dfma = fma[d]; + r_t that(dfma(*firstc, sub(x, *firstn), *curc)); + auto step = [&](auto that, auto argc, auto argn) { return dfma(that, sub(x, argn), argc); }; + for( advance(curc, 1); curc != lastc; advance(curc, 1), advance(curn, 1) ) + that = step(that, *curc, *curn); + return that; + } +} + +//================================================================================================ +//== Newton with ranges +//================================================================================================ +template +EVE_FORCEINLINE constexpr auto +newton_impl(D const& d, T0 xx, R1 const& rc, R2 rn) noexcept +{ + using r_t = decltype(xx+ (typename R1::value_type)(0)+(typename R2::value_type)(0)); + auto x = r_t(xx); + auto firstc = begin(rc); + auto lastc = end(rc); + if( firstc == lastc ) return r_t(0); + else + { + auto siz = std::distance(firstc, lastc); + EVE_ASSERT(siz == inc(std::distance(begin(rn), end(rn))), + "number of nodes must equal to the number of coefficients minus 1"); + if( siz == 1 ) return r_t(*firstc); + else + { + using std::advance; + auto firstn = begin(rn); + auto curn = firstn; + auto curc = firstc; + advance(curc, 1); + advance(curn, 1); + auto dfma = fma[d]; + r_t that(dfma(*firstc, sub(x, *firstn), *curc)); + auto step = [&](auto that, auto argc, auto argn) { return dfma( that, sub(x, argn), argc); }; + for( advance(curc, 1); curc != lastc; advance(curc, 1), advance(curn, 1) ) + that = step(that, *curc, *curn); + return that; + } + } +} +} diff --git a/include/eve/module/math/detail/reverse_horner_impl.hpp b/include/eve/module/math/detail/reverse_horner_impl.hpp index a004c54a01..535971fe27 100644 --- a/include/eve/module/math/detail/reverse_horner_impl.hpp +++ b/include/eve/module/math/detail/reverse_horner_impl.hpp @@ -22,7 +22,7 @@ namespace eve::detail { - template + template EVE_FORCEINLINE constexpr auto reverse_horner_impl(D const& d, T0 xx, C0 c0, Cs... cs) noexcept -> decltype(horner(xx, c0, cs...)) @@ -38,7 +38,7 @@ namespace eve::detail { using r_t = common_value_t; auto x = r_t(xx); - using t_t = kumi::result::generate_t; + using t_t = kumi::result::generate_t; t_t c {r_t{c0}, r_t{cs}...}; return d(reverse_horner)(x, c); } @@ -60,7 +60,7 @@ namespace eve::detail else if( std::distance(cur, first) == 1 ) return r_t(*cur); else { - auto dfma = d(fma); + auto dfma = fma[d]; auto that = r_t(0); auto step = [&](auto that, auto arg) { return dfma(x, that, arg); }; for(; cur != first; ++cur ) that = step(that, *cur); diff --git a/include/eve/module/core/detail/tchebeval.hpp b/include/eve/module/math/detail/tchebeval.inactive similarity index 100% rename from include/eve/module/core/detail/tchebeval.hpp rename to include/eve/module/math/detail/tchebeval.inactive diff --git a/include/eve/module/math/numeric/impl/horner.hpp b/include/eve/module/math/numeric/impl/horner.hpp index e84d906c50..a7f560d829 100644 --- a/include/eve/module/math/numeric/impl/horner.hpp +++ b/include/eve/module/math/numeric/impl/horner.hpp @@ -17,9 +17,9 @@ namespace eve::detail template EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 xx, IT const& first, IT const& last) noexcept --> decltype(detail::horner_impl(numeric_type(), xx, first, last)) +-> decltype(detail::horner_impl(pedantic, xx, first, last)) { - return detail::horner_impl(numeric_type(), xx, first, last); + return detail::horner_impl(pedantic, xx, first, last); } //================================================================================================ @@ -34,9 +34,9 @@ horner_(EVE_SUPPORTS(cpu_), callable_one_ const&, IT const& first, IT const& last) noexcept --> decltype(detail::horner_impl(numeric_type(), xx, one, first, last)) +-> decltype(detail::horner_impl(pedantic, xx, one, first, last)) { - return detail::horner_impl(numeric_type(), xx, one, first, last); + return detail::horner_impl(pedantic, xx, one, first, last); } //================================================================================================ @@ -45,9 +45,9 @@ horner_(EVE_SUPPORTS(cpu_), template EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 xx, R const& r) noexcept --> decltype(detail::horner_impl(numeric_type(), xx, r)) +-> decltype(detail::horner_impl(pedantic, xx, r)) { - return detail::horner_impl(numeric_type(), xx, r); + return detail::horner_impl(pedantic, xx, r); } //================================================================================================ @@ -56,9 +56,9 @@ horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 xx, R const& r) noexcept template EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 xx, callable_one_ const&, R const& r) noexcept --> decltype(detail::horner_impl(numeric_type(), xx, one, r)) +-> decltype(detail::horner_impl(pedantic, xx, one, r)) { - return detail::horner_impl(numeric_type(), xx, one, r); + return detail::horner_impl(pedantic, xx, one, r); } //================================================================================================ @@ -68,7 +68,7 @@ template EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 x, Ts... args) noexcept { - return horner_impl(numeric_type(), x, args...); + return horner_impl(pedantic, x, args...); } //================================================================================================ @@ -79,13 +79,13 @@ template EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 x, callable_one_ const&, Ts... args) noexcept { - return horner_impl(numeric_type(), x, one, args...); + return horner_impl(pedantic, x, one, args...); } template EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), numeric_type const &, T0 x, Ts tup) noexcept { - return kumi::apply( [&](auto... m) { return horner_impl(numeric_type(), x, m...); }, tup); + return kumi::apply( [&](auto... m) { return horner_impl(pedantic, x, m...); }, tup); } } diff --git a/include/eve/module/math/numeric/impl/reverse_horner.hpp b/include/eve/module/math/numeric/impl/reverse_horner.hpp index 9a830bd61b..7989e5af64 100644 --- a/include/eve/module/math/numeric/impl/reverse_horner.hpp +++ b/include/eve/module/math/numeric/impl/reverse_horner.hpp @@ -21,9 +21,9 @@ reverse_horner_(EVE_SUPPORTS(cpu_), T0 xx, IT const& first, IT const& last) noexcept --> decltype(detail::reverse_horner_impl(numeric_type(), xx, first, last)) +-> decltype(detail::reverse_horner_impl(pedantic, xx, first, last)) { - return detail::reverse_horner_impl(numeric_type(), xx, first, last); + return detail::reverse_horner_impl(pedantic, xx, first, last); } //================================================================================================ @@ -38,9 +38,9 @@ reverse_horner_(EVE_SUPPORTS(cpu_), callable_one_ const&, IT const& first, IT const& last) noexcept --> decltype(detail::reverse_horner_impl(numeric_type(), xx, one, first, last)) +-> decltype(detail::reverse_horner_impl(pedantic, xx, one, first, last)) { - return detail::reverse_horner_impl(numeric_type(), xx, one, first, last); + return detail::reverse_horner_impl(pedantic, xx, one, first, last); } //================================================================================================ @@ -49,9 +49,9 @@ reverse_horner_(EVE_SUPPORTS(cpu_), template EVE_FORCEINLINE constexpr auto reverse_horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 xx, R const& r) noexcept --> decltype(detail::reverse_horner_impl(numeric_type(), xx, r)) +-> decltype(detail::reverse_horner_impl(pedantic, xx, r)) { - return detail::reverse_horner_impl(numeric_type(), xx, r); + return detail::reverse_horner_impl(pedantic, xx, r); } //================================================================================================ @@ -64,9 +64,9 @@ reverse_horner_(EVE_SUPPORTS(cpu_), T0 xx, callable_one_ const&, R const& r) noexcept --> decltype(detail::reverse_horner_impl(numeric_type(), xx, one, r)) +-> decltype(detail::reverse_horner_impl(pedantic, xx, one, r)) { - return detail::reverse_horner_impl(numeric_type(), xx, one, r); + return detail::reverse_horner_impl(pedantic, xx, one, r); } //================================================================================================ @@ -75,9 +75,9 @@ reverse_horner_(EVE_SUPPORTS(cpu_), template EVE_FORCEINLINE constexpr auto reverse_horner_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 x, Ts... args) noexcept --> decltype( reverse_horner_impl(numeric_type(), x, args...)) +-> decltype( reverse_horner_impl(pedantic, x, args...)) { - return reverse_horner_impl(numeric_type(), x, args...); + return reverse_horner_impl(pedantic, x, args...); } //================================================================================================ @@ -91,9 +91,9 @@ reverse_horner_(EVE_SUPPORTS(cpu_), T0 x, callable_one_ const&, Ts... args) noexcept --> decltype(reverse_horner_impl(numeric_type(), x, one, args...)) +-> decltype(reverse_horner_impl(pedantic, x, one, args...)) { - return reverse_horner_impl(numeric_type(), x, one, args...); + return reverse_horner_impl(pedantic, x, one, args...); } //================================================================================================ diff --git a/include/eve/module/math/numeric/math.hpp b/include/eve/module/math/numeric/math.hpp index 0a764d2ce4..2d6a16fc03 100644 --- a/include/eve/module/math/numeric/math.hpp +++ b/include/eve/module/math/numeric/math.hpp @@ -6,6 +6,3 @@ */ //================================================================================================== #pragma once - -#include -#include diff --git a/include/eve/module/math/pedantic/math.hpp b/include/eve/module/math/pedantic/math.hpp index 0a764d2ce4..2d6a16fc03 100644 --- a/include/eve/module/math/pedantic/math.hpp +++ b/include/eve/module/math/pedantic/math.hpp @@ -6,6 +6,3 @@ */ //================================================================================================== #pragma once - -#include -#include diff --git a/include/eve/module/math/regular/horner.hpp b/include/eve/module/math/regular/horner.hpp index 813ad2a184..441114d346 100644 --- a/include/eve/module/math/regular/horner.hpp +++ b/include/eve/module/math/regular/horner.hpp @@ -7,10 +7,30 @@ //================================================================================================== #pragma once -#include +#include +#include +#include +#include namespace eve { + template + struct horner_t : callable + { + template + EVE_FORCEINLINE constexpr common_value_t + operator()(X x, T t, Ts...ts) const noexcept + { return EVE_DISPATCH_CALL(x, t, ts...); } + + template + EVE_FORCEINLINE constexpr + eve::common_value_t, X> + operator()(X x, Tup const& t) const noexcept + { return EVE_DISPATCH_CALL(x, t); } + + EVE_CALLABLE_OBJECT(horner_t, horner_); + }; + //================================================================================================ //! @addtogroup math //! @{ @@ -66,20 +86,47 @@ namespace eve //! //! @groupheader{Example} //! -//! @godbolt{doc/math/regular/horner.cpp} +//! @godbolt{doc/math/horner.cpp} //! //! @groupheader{Semantic Modifiers} //! -//! * eve::pedantic, eve::numeric +//! * eve::pedantic //! -//! If d denotes one of these modifiers, the expression `d(eve::horner)(...)` -//! computes the result using `d(eve::fma)` instead of `eve::fma` in internal computations. +//! The expression `eve::horner[pedantic](...)` +//! computes the result using `fma[pedantic]` instead of `eve::fma` in internal computations. //! -//! This is intended to insure more accurate computations where needed. This has no cost if -//! the system has hard wired fma but is very expansive if it is not the case. +//! This is intended to insure more accurate computations where needed. This has no cost (and is +//! automatically done) if the system has hard wired fma but is very expansive if it is not the case. //! @} //================================================================================================ -EVE_MAKE_CALLABLE(horner_, horner); -} + inline constexpr auto horner = functor; + + namespace detail + { + template + EVE_FORCEINLINE constexpr common_value_t + horner_(EVE_REQUIRES(cpu_), O const & o, X xx, C c, Cs... cs) noexcept + { + using r_t = common_value_t; -#include + if constexpr( sizeof...(Cs) == 0 ) return r_t(c); + else + { + auto x = r_t(xx); + r_t that{0}; + + that = fma[o](that, x, c); + ((that = fma[o](that, x, cs)), ...); + + return that; + } + } + + template + EVE_FORCEINLINE constexpr auto + horner_(EVE_REQUIRES(cpu_), O const & o, X x, Tuple const& tup) noexcept + { + return kumi::apply( [&](auto... m) { return horner[o](x, m...); }, tup); + } + } +} diff --git a/include/eve/module/math/regular/impl/horner.hpp b/include/eve/module/math/regular/impl/horner.hpp index ab728fedac..79b87953e5 100644 --- a/include/eve/module/math/regular/impl/horner.hpp +++ b/include/eve/module/math/regular/impl/horner.hpp @@ -17,7 +17,7 @@ template EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), T0 x, Ts... args) noexcept { - return horner_impl(regular_type(), x, args...); + return horner_impl(regular, x, args...); } //================================================================================================ @@ -38,7 +38,7 @@ EVE_FORCEINLINE constexpr auto horner_(EVE_SUPPORTS(cpu_), T0 xx, R const& r) noexcept requires(compatible_values && (!simd_value)) { - return detail::horner_impl(regular_type(), xx, r); + return detail::horner_impl(regular, xx, r); } template diff --git a/include/eve/module/polynomial/regular/impl/newton.hpp b/include/eve/module/math/regular/impl/newton.hpp similarity index 100% rename from include/eve/module/polynomial/regular/impl/newton.hpp rename to include/eve/module/math/regular/impl/newton.hpp diff --git a/include/eve/module/polynomial/regular/impl/tchebeval.hpp b/include/eve/module/math/regular/impl/tchebeval.inactive similarity index 100% rename from include/eve/module/polynomial/regular/impl/tchebeval.hpp rename to include/eve/module/math/regular/impl/tchebeval.inactive diff --git a/include/eve/module/math/regular/math.hpp b/include/eve/module/math/regular/math.hpp index a56acd41c1..6b98cc0c32 100644 --- a/include/eve/module/math/regular/math.hpp +++ b/include/eve/module/math/regular/math.hpp @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include diff --git a/include/eve/module/math/regular/newton.hpp b/include/eve/module/math/regular/newton.hpp new file mode 100644 index 0000000000..3dcb39eedc --- /dev/null +++ b/include/eve/module/math/regular/newton.hpp @@ -0,0 +1,157 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve +{ + template + struct newton_t : callable + { + template + EVE_FORCEINLINE constexpr common_value_t + operator()(X x, CsNs... csns) const noexcept + { return EVE_DISPATCH_CALL(x, csns...); } + + template + EVE_FORCEINLINE constexpr + eve::common_value_t + operator()(X x, kumi::tuple const & t1, kumi::tuple const & t2) const noexcept + { return EVE_DISPATCH_CALL(x, t1, t2); } + + EVE_CALLABLE_OBJECT(newton_t, newton_); + }; + +//================================================================================================ +//! @addtogroup polynomial +//! @{ +//! @var newton +//! @brief Implement the Newton scheme to evaluate polynomials. +//! +//! If \f$(a_i)_{0\le i\le n-1}\f$ denotes the coefficients of the polynomial by decreasing +//! power order, and \f$(c_i)_{0\le i\le n-2}\f$ the nodes, the Newton scheme evaluates +//! the polynom \f$p\f$ at \f$x\f$ using the following formula : +//! +//! \f$ \displaystyle p(x) = (((a_0(x-c_0)+a_1)(x-c_1)+ ... )(x-c_{n-2}) + a_{n-1})\f$ +//! +//! **Defined in header** +//! +//! @code +//! #include +//! @endcode +//! +//! @groupheader{Callable Signatures} +//! +//! @code +//! namespace eve +//! { +//! template< eve::floating_ordered_value T, kumi::tuple C, kumi::tuple N> +//! T newton(T x, C c, N n) noexcept; //1 +//! template< eve::floating_ordered_value T, value... CN> +//! T newton(T x, CN.. cn) noexcept; //2 + +//! } +//! @endcode +//! +//! **Parameters** +//! +//! * `x` : [real floating argument](@ref eve::floating_ordered_value). +//! * `c` : tuple containing The coefficients by decreasing power order. +//! * `n` : tuple containing The nodes by decreasing power order. +//! * `cn`: coefficients followed by nodes. Unless empty the total number of values +//! is to be odd. If s is this number, the (s+1)/2 first are taken as the coefs +//! and the others are the nodes, both in decreasing power order +//! **Return value** +//! +//! The value of the polynom at `x` is returned. +//! +//! **Notes** +//! +//! If the coefficients or nodes are simd values of cardinal N, this means you simultaneously +//! compute the values of N polynomials. +//! * If x is scalar, the polynomials are all computed at the same point +//! * If x is simd, the nth polynomial is computed on the nth value of x +//! +//! @groupheader{Example} +//! +//! @godbolt{doc/math/newton.cpp} +//! +//! @groupheader{Semantic Modifiers} +//! +//! * eve::pedantic +//! +//! the expression `eve::newton[pedantic](...)` +//! computes the result using `eve::fma[pedantic]` instead of `eve::fma` in +//! internal computation. +//! +//! This is intended to insure more accurate computations where needed. This has no cost (and is +//! automatically done) if the system has hard wired fma but is very expansive if it is not the case. +//! @} +//================================================================================================ + inline constexpr auto newton = functor; + + namespace detail + { + template + EVE_FORCEINLINE constexpr auto + newton_(EVE_REQUIRES(cpu_), O const &o, X xx, kumi::tuple const& cs + , kumi::tuple const& ns) + { + EVE_ASSERT((kumi::size_v == 0 && kumi::size_v == 0)|| + (kumi::size_v == kumi::size_v+1), "nodes and coefs have incompatible sizes"); + + using r1_t = common_value_t; + using r2_t = common_value_t; + using r_t = common_value_t; + if constexpr(kumi::size_v == 0) + { + return zero(as()); + } + else if constexpr(kumi::size_v == 1) + { + return r_t(get<0>(cs)); + } + else + { + r_t that = r_t(get<0>(cs)); + auto ans = kumi::zip(kumi::pop_front(cs), ns); + auto compute = [&](auto ... args){ + auto doit = [&](auto an){ + auto [a, n] = an; + that = fma[o](that, xx-n, a); + }; + ((doit(args), ...)); + return that; + }; + return kumi::apply(compute, ans); + } + } + + template + EVE_FORCEINLINE constexpr auto + newton_(EVE_REQUIRES(cpu_), O const &o, X xx, CsNs... cns) + { + using r_t = common_value_t; + auto x = r_t(xx); + constexpr auto s = sizeof...(cns); + if constexpr(s == 0) + return zero(as(xx)); + else + { + EVE_ASSERT(s&1, "nodes and coefs have incompatible sizes"); + kumi::result::generate_t tcn{r_t{cns}...}; + auto [tc, tn] = split(tcn, kumi::index<(s+1)/2>); + return newton[o](x,tc,tn); + } + } + } +} diff --git a/include/eve/module/math/regular/reverse_horner.hpp b/include/eve/module/math/regular/reverse_horner.hpp index 830a974ae4..cbe4f23050 100644 --- a/include/eve/module/math/regular/reverse_horner.hpp +++ b/include/eve/module/math/regular/reverse_horner.hpp @@ -7,10 +7,31 @@ //================================================================================================== #pragma once -#include +#include +#include +#include +#include namespace eve { + template + struct reverse_horner_t : callable + { + template + EVE_FORCEINLINE constexpr common_value_t + operator()(X x, T t, Ts...ts) const noexcept + { return EVE_DISPATCH_CALL(x, t, ts...); } + + template + EVE_FORCEINLINE constexpr + eve::common_value_t, X> + operator()(X x, Tup const& t) const noexcept + { return EVE_DISPATCH_CALL(x, t); } + + EVE_CALLABLE_OBJECT(reverse_horner_t, reverse_horner_); + }; + + //================================================================================================ //! @addtogroup math //! @{ @@ -66,21 +87,50 @@ namespace eve //! //! @groupheader{Example} //! -//! @godbolt{doc/math/regular/reverse_horner.cpp} +//! @godbolt{doc/math/reverse_horner.cpp} //! //! @groupheader{Semantic Modifiers} //! -//! * eve::pedantic, eve::numeric +//! * eve::pedantic //! -//! If d denotes one of these modifiers, the expression `d(eve::reverse_horner)(...)` -//! computes the result using `d(eve::fma)` instead of `eve::fma` in internal computation. +//! The expression `eve::reverse_horner[pedantic](...)` +//! computes the result using `d(eve::fma)` instead of `eve::fma` in internal computation. //! -//! This is intended to insure more accurate computations where needed. This has no cost if -//! the system has hard wired fma but is very expansive if it is not the case. +//! This is intended to insure more accurate computations where needed. This has no cost (and is +//! automatically done) if the system has hard wired fma but is very expansive if it is not the case. //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(reverse_horner_, reverse_horner); -} + inline constexpr auto reverse_horner = functor; + + namespace detail + { + template + EVE_FORCEINLINE constexpr common_value_t + reverse_horner_(EVE_REQUIRES(cpu_), O const & o, X xx, C c0, Cs... cs) noexcept + { + if constexpr((... && scalar_value)) + { + using e_t = element_type_t; + using t_t = kumi::result::generate_t; + t_t c{e_t(c0), e_t(cs)...}; + return reverse_horner[o](xx, c); + } + else + { + using r_t = common_value_t; + auto x = r_t(xx); + using t_t = kumi::result::generate_t; + t_t c {r_t{c0}, r_t{cs}...}; + return reverse_horner[o](x, c); + } + } -#include + template + EVE_FORCEINLINE constexpr common_value_t + reverse_horner_(EVE_REQUIRES(cpu_), O const & o, X x, kumi::tuple tup) noexcept + { + return horner[o](x, kumi::reverse(tup)); + } + } +} diff --git a/include/eve/module/polynomial/regular/tchebeval.hpp b/include/eve/module/math/regular/tchebeval.inactive similarity index 62% rename from include/eve/module/polynomial/regular/tchebeval.hpp rename to include/eve/module/math/regular/tchebeval.inactive index 656a885c3a..b6f629f925 100644 --- a/include/eve/module/polynomial/regular/tchebeval.hpp +++ b/include/eve/module/math/regular/tchebeval.inactive @@ -7,10 +7,30 @@ //================================================================================================== #pragma once -#include +#include +#include +#include +#include namespace eve { + template + struct tchebeval_t : callable + { + template + EVE_FORCEINLINE constexpr common_value_t + operator()(X x, T t, Ts...ts) const noexcept + { return EVE_DISPATCH_CALL(x, t, ts...); } + + template + EVE_FORCEINLINE constexpr + eve::common_value_t, X> + operator()(X x, Tup const& t) const noexcept + { return EVE_DISPATCH_CALL(x, t); } + + EVE_CALLABLE_OBJECT(tchebeval_t, tchebeval_); + }; + //================================================================================================ //! @addtogroup polynomial //! @{ @@ -24,7 +44,7 @@ namespace eve //! **Defined in header** //! //! @code -//! #include +//! #include //! @endcode //! //! @groupheader{Callable Signatures} @@ -32,21 +52,22 @@ namespace eve //! @code //! namespace eve //! { -//! template< eve::floating_ordered_value T, eve::floating_ordered_value Cs ...> +//! template< eve::floating_value T, eve::floating_value Cs ...> //! T tchebeval(T x, Cs ... cs) noexcept; // 1 //! -//! template< eve::floating_ordered_value T, eve::range R> -//! T tchebeval(T x, R r) noexcept; // 2 +//! template< eve::floating_value T, kumi::tuple Tup> +//! T tchebeval(T x, Tup t) noexcept; // 2 //! -//! template< eve::floating_ordered_value T, eve::floating_ordered_value U -//! , eve::floating_ordered_value V, eve::range R> -//! T tchebeval(T x, U a, V b, R r) noexcept; // 3 +//! template< eve::floating_value T, +//! eve::floating_value U, +//! eve::floating_value V, kumi::tuple Tup> +//! T tchebeval(T x, U a, V b, Tup t) noexcept; // 3 //! } //! @endcode //! //! 1. Evaluates the Tchebytchev polynomial using a variadic list of coefficients. -//! 2. Evaluates the Tchebytchev polynomial using a range of coefficients. -//! 3. Evaluates the Tchebytchev polynomial using a range of coefficients in interval `[a,b]`. +//! 2. Evaluates the Tchebytchev polynomial using a tuple of coefficients. +//! 3. Evaluates the Tchebytchev polynomial using a tuple of coefficients in interval `[a,b]`. //! //! **Parameters** //! @@ -71,10 +92,10 @@ namespace eve //! //! @groupheader{Example} //! -//! @godbolt{doc/polynomial/regular/tchebeval.cpp} +//! @godbolt{doc/math/tchebeval.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(tchebeval_, tchebeval); -} + inline constexpr auto tchebeval = functor; -#include + namespace detail + { diff --git a/include/eve/module/polynomial/detail/newton_impl.hpp b/include/eve/module/polynomial/detail/newton_impl.hpp index b3b0e00d14..564ac2b668 100644 --- a/include/eve/module/polynomial/detail/newton_impl.hpp +++ b/include/eve/module/polynomial/detail/newton_impl.hpp @@ -37,7 +37,7 @@ newton_impl(D const& d, T0 xx, IT1 const& firstc, IT1 const& lastc, IT2 const& f auto curn = firstn; advance(curc, 1); advance(curn, 1); - auto dfma = d(fma); + auto dfma = fma[d]; r_t that(dfma(*firstc, sub(x, *firstn), *curc)); auto step = [&](auto that, auto argc, auto argn) { return dfma(that, sub(x, argn), argc); }; for( advance(curc, 1); curc != lastc; advance(curc, 1), advance(curn, 1) ) @@ -72,7 +72,7 @@ newton_impl(D const& d, T0 xx, R1 const& rc, R2 rn) noexcept auto curc = firstc; advance(curc, 1); advance(curn, 1); - auto dfma = d(fma); + auto dfma = fma[d]; r_t that(dfma(*firstc, sub(x, *firstn), *curc)); auto step = [&](auto that, auto argc, auto argn) { return dfma( that, sub(x, argn), argc); }; for( advance(curc, 1); curc != lastc; advance(curc, 1), advance(curn, 1) ) diff --git a/include/eve/module/polynomial/detail/tchebeval_impl.hpp b/include/eve/module/polynomial/detail/tchebeval_impl.inactive similarity index 98% rename from include/eve/module/polynomial/detail/tchebeval_impl.hpp rename to include/eve/module/polynomial/detail/tchebeval_impl.inactive index ba032444a3..4d7edfc21c 100644 --- a/include/eve/module/polynomial/detail/tchebeval_impl.hpp +++ b/include/eve/module/polynomial/detail/tchebeval_impl.inactive @@ -33,7 +33,7 @@ tchebeval_impl(D const& d, T0 xx, R const& r) noexcept else { --cur; - auto dfma = d(fma); + auto dfma = fma[d]; r_t b2 = zero(as()); r_t b1 = r_t(*cur--); for( ; cur != first; --cur ) @@ -59,7 +59,7 @@ tchebeval_impl(D const& d, T0 xx, T1 a, T2 b, R const& r) noexcept if( std::distance(first, cur) == 1 ) return r_t((*first) / 2); else { - auto dfma = d(fma); + auto dfma = fma[d]; auto up = [&dfma, &r, first](auto, auto t) { auto cur = std::end(r); diff --git a/include/eve/module/polynomial/numeric/impl/newton.hpp b/include/eve/module/polynomial/numeric/impl/newton.hpp index 4416141fbd..045dd1e729 100644 --- a/include/eve/module/polynomial/numeric/impl/newton.hpp +++ b/include/eve/module/polynomial/numeric/impl/newton.hpp @@ -23,9 +23,9 @@ newton_(EVE_SUPPORTS(cpu_), IT0 const& firstc, IT0 const& lastc, IT1 const& firstn) noexcept --> decltype( detail::newton_impl(numeric_type(), xx, firstc, lastc, firstn)) +-> decltype( detail::newton_impl(pedantic, xx, firstc, lastc, firstn)) { - return detail::newton_impl(numeric_type(), xx, firstc, lastc, firstn); + return detail::newton_impl(pedantic, xx, firstc, lastc, firstn); } //================================================================================================ @@ -34,8 +34,8 @@ newton_(EVE_SUPPORTS(cpu_), template EVE_FORCEINLINE constexpr auto newton_(EVE_SUPPORTS(cpu_), numeric_type const&, T0 xx, R1 const& rc, R2 const& rn) noexcept --> decltype(detail::newton_impl(numeric_type(), xx, rc, rn)) +-> decltype(detail::newton_impl(pedantic, xx, rc, rn)) { - return detail::newton_impl(numeric_type(), xx, rc, rn); + return detail::newton_impl(pedantic, xx, rc, rn); } } diff --git a/include/eve/module/polynomial/regular/newton.hpp b/include/eve/module/polynomial/regular/newton.hpp deleted file mode 100644 index 4dd45cbce9..0000000000 --- a/include/eve/module/polynomial/regular/newton.hpp +++ /dev/null @@ -1,78 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include - -namespace eve -{ -//================================================================================================ -//! @addtogroup polynomial -//! @{ -//! @var newton -//! @brief Implement the Newton scheme to evaluate polynomials. -//! -//! If \f$(a_i)_{0\le i\le n-1}\f$ denotes the coefficients of the polynomial by decreasing -//! power order, and \f$(c_i)_{0\le i\le n-2}\f$ the nodes, the Newton scheme evaluates -//! the polynom \f$p\f$ at \f$x\f$ using the following formula : -//! -//! \f$ \displaystyle p(x) = (((a_0(x-c_0)+a_1)(x-c_1)+ ... )(x-c_{n-2}) + a_{n-1})\f$ -//! -//! **Defined in header** -//! -//! @code -//! #include -//! @endcode -//! -//! @groupheader{Callable Signatures} -//! -//! @code -//! namespace eve -//! { -//! template< eve::floating_ordered_value T, eve::Range C, eve::Range N> -//! T newton(T x, C c, N n) noexcept; -//! } -//! @endcode -//! -//! **Parameters** -//! -//! * `x` : [real floating argument](@ref eve::floating_ordered_value). -//! * `r` : Range containing The coefficients by decreasing power order. -//! * `n` : Range containing The nodes by decreasing power order. -//! -//! **Return value** -//! -//! The value of the polynom at `x` is returned. -//! -//! **Notes** -//! -//! If the coefficients or nodes are simd values of cardinal N, this means you simultaneously -//! compute the values of N polynomials. -//! * If x is scalar, the polynomials are all computed at the same point -//! * If x is simd, the nth polynomial is computed on the nth value of x -//! -//! @groupheader{Example} -//! -//! @godbolt{doc/polynomial/regular/newton.cpp} -//! -//! @groupheader{Semantic Modifiers} -//! -//! * eve::pedantic, eve::numeric -//! -//! If d denotes one of these modifiers, the expression `d(eve::newton)(...)` -//! computes the result using `d(eve::fma)` instead of `eve::fma` in -//! internal computation. -//! -//! This is intended to insure more accurate computations where needed. This has no cost if -//! the system has hard wired fma but is very expansive if it is not the case. -//! @} -//================================================================================================ -EVE_MAKE_CALLABLE(newton_, newton); -} - -#include diff --git a/include/eve/module/polynomial/regular/polynomial.hpp b/include/eve/module/polynomial/regular/polynomial.hpp index 83806a4472..4498fa77ba 100644 --- a/include/eve/module/polynomial/regular/polynomial.hpp +++ b/include/eve/module/polynomial/regular/polynomial.hpp @@ -12,6 +12,4 @@ #include #include #include -#include -#include #include diff --git a/include/eve/traits/overload/default_behaviors.hpp b/include/eve/traits/overload/default_behaviors.hpp index 12155a86f6..ae3f0e9e93 100644 --- a/include/eve/traits/overload/default_behaviors.hpp +++ b/include/eve/traits/overload/default_behaviors.hpp @@ -8,7 +8,7 @@ #pragma once #include -#include // TEMPORARY +#include #include // TEMPORARY #include @@ -35,7 +35,8 @@ namespace eve return Func{opts}; } - template EVE_FORCEINLINE constexpr auto operator[](T t) const requires( requires(base const& b) { b[t];} ) + template + EVE_FORCEINLINE constexpr auto operator[](T t) const requires( requires(base const& b) { b[t];} ) { auto new_traits = base::operator[](t); return Func{new_traits}; @@ -47,7 +48,7 @@ namespace eve return (*this)[as_option(t)]; } - template void operator[](T t) const + template EVE_FORCEINLINE void operator[](T const& t) const // This requires is also TEMPORARY requires( !callable_options && !requires(base const& b) { b[t];} && !decorator) =delete; @@ -58,7 +59,8 @@ namespace eve } protected: - constexpr Func const& derived() const { return static_castconst&>(*this); } + EVE_FORCEINLINE constexpr + Func const& derived() const { return static_castconst&>(*this); } }; //==================================================================================================================== @@ -99,37 +101,35 @@ namespace eve EVE_FORCEINLINE constexpr auto behavior(auto arch, O const& opts, T x0, Ts const&... xs) const requires(!match_option) { - // Grab the condition and drop it from the callable - auto cond = opts[condition_key]; - auto drop = rbr::drop(condition_key, opts); - auto rmv_cond = options{drop}; - - [[maybe_unused]] Func const f{rmv_cond}; - - // Check that the mask and the value are of same kind if simd - constexpr bool compatible_mask = !( simd_value - && scalar_value - ); - static_assert(compatible_mask, "[EVE] - Scalar values can't be masked by SIMD logicals."); - - // Shush any other cascading errors - if constexpr(!compatible_mask) return ignore{}; - // Or proceed to find the proper way to handle this masked call + // Grab the condition and drop it from the callable + auto[cond, rmv_cond] = opts.extract(condition_key); + using cond_t = decltype(cond); + [[maybe_unused]] Func const f{rmv_cond}; + + // Check that the mask and the value are of same kind if simd + constexpr bool compatible_mask = !( simd_value + && scalar_value + ); + static_assert(compatible_mask, "[EVE] - Scalar values can't be masked by SIMD logicals."); + + // Shush any other cascading errors + if constexpr(!compatible_mask) return ignore{}; + // Or proceed to find the proper way to handle this masked call + else + { + // Check if func_(arch, cond, opts, ...) exists + constexpr bool supports_mask = requires(cond_t c){ func_t::deferred_call(arch, c, opts, x0, xs...); }; + + // If the conditional call is supported, call it + // Note that as we pruned out ignore_none earlier, the only special cases inside this call is ignore_all + if constexpr( supports_mask ) return func_t::deferred_call(arch, cond, opts, x0, xs...); else { - // Check if func_(arch, cond, opts, ...) exists - constexpr bool supports_mask = requires{ func_t::deferred_call(arch, cond, opts, x0, xs...); }; - - // If the conditional call is supported, call it - // Note that as we pruned out ignore_none earlier, the only special cases inside this call is ignore_all - if constexpr( supports_mask ) return func_t::deferred_call(arch, cond, opts, x0, xs...); - else - { - // if not, call the non-masked version then mask piecewise - return detail::mask_op(cond, detail::return_2nd, x0, f(x0,xs...)); - } + // if not, call the non-masked version then mask piecewise + return detail::mask_op(cond, detail::return_2nd, x0, f(x0,xs...)); } } + } }; diff --git a/include/eve/traits/overload/supports.hpp b/include/eve/traits/overload/supports.hpp index 97a17b5b4a..fb98789a52 100644 --- a/include/eve/traits/overload/supports.hpp +++ b/include/eve/traits/overload/supports.hpp @@ -7,6 +7,7 @@ //====================================================================================================================== #pragma once +#include "eve/detail/raberu.hpp" #include #include #include @@ -55,6 +56,14 @@ namespace eve auto dropped = rbr::drop(k0, *this); return options{dropped}.drop(ks...); } + + template + constexpr EVE_FORCEINLINE auto extract(Ks const& kws) const noexcept + { + auto value = (*this)[kws]; + auto dropped = rbr::drop(kws, *this); + return kumi::tuple{value, options{dropped}}; + } }; template @@ -133,14 +142,14 @@ namespace eve //! @return A new @callable with the options `o` set. //================================================================================================================== template - constexpr auto operator[](O o) const + EVE_FORCEINLINE constexpr auto operator[](O o) const requires( requires(OptionsValues const& ov) { this->process(ov,o);} ) { return process(static_cast(*this), o); } /// Retrieves the current options' state, including processed default - constexpr auto options() const + EVE_FORCEINLINE constexpr auto options() const { return kumi::fold_left( [&](auto acc, auto const& m) { return m.default_to(acc); } , kumi::tuple{} diff --git a/test/doc/CMakeLists.txt b/test/doc/CMakeLists.txt index 62e00b25f8..bf1109d4a7 100644 --- a/test/doc/CMakeLists.txt +++ b/test/doc/CMakeLists.txt @@ -58,6 +58,7 @@ glob_unit("doc" ${doc_root} "core/raw/*.cpp" ) glob_unit("doc" ${doc_root} "core/regular/*.cpp" ) glob_unit("doc" ${doc_root} "core/roundings/*.cpp" ) glob_unit("doc" ${doc_root} "core/saturated/*.cpp" ) +glob_unit("doc" é{doc_root} "core/*.cpp" ) ##================================================================================================== ## GLOB and process elliptic doc tests diff --git a/test/doc/core/regular/fma.cpp b/test/doc/core/regular/fma.cpp index b3d07549f6..704b2c578f 100644 --- a/test/doc/core/regular/fma.cpp +++ b/test/doc/core/regular/fma.cpp @@ -20,8 +20,7 @@ int main() << " <- pf = " << pf << '\n' << " <- qf = " << qf << '\n' << " -> of*pf+qf = " << of*pf+qf << '\n' - << " -> pedantic(fma)(of, pf, qf) = " << eve::pedantic(eve::fma)(of, pf, qf) << '\n' - << " -> numeric(fma)(of, pf, qf) = " << eve::numeric(eve::fma)(of, pf, qf) << '\n' + << " -> fma[pedantic](of, pf, qf) = " << eve::fma[eve::pedantic](of, pf, qf) << '\n' << " -> fma(of, pf, qf) = " << eve::fma(of, pf, qf) << '\n' << "\n if the last fma result ends by '0, inf}', it is because\n" << " the system has no simd fma family intrinsics\n" @@ -29,13 +28,11 @@ int main() std::cout << "---- scalar" << std::setprecision(10) << '\n' << " <- vm = " << vm << '\n' - << " -> pedantic(fma)(vm, 2.0f, -vm) = " << eve::pedantic(eve::fma)(vm, 2.0f, -vm) << '\n' - << " -> numeric(fma)(vm, 2.0f, -vm) = " << eve::numeric(eve::fma)(vm, 2.0f, -vm) << '\n' + << " -> fma[pedantic](vm, 2.0f, -vm) = " << eve::fma[eve::pedantic](vm, 2.0f, -vm) << '\n' << " -> fma(vm, 2.0f, -vm) = " << eve::fma(vm, 2.0f, -vm) << '\n' << " <- esm1 = " << esm1 << '\n' << " <- esp1 = " << esp1 << '\n' - << " -> pedantic(fma)(esp1, esm1, 1.0f) = " << eve::pedantic(eve::fma)(esp1, esm1, 1.0f) << '\n' - << " -> numeric(fma)(esp1, esm1, 1.0f) = " << eve::numeric(eve::fma)(esp1, esm1, 1.0f) << '\n' + << " -> fma[pedantic](esp1, esm1, 1.0f) = " << eve::fma[eve::pedantic](esp1, esm1, 1.0f) << '\n' << " -> fma(esp1, esm1, -1.0f) = " << eve::fma(esp1, esm1, 1.0f) << '\n'; return 0; } diff --git a/test/doc/polynomial/regular/tchebeval.cpp b/test/doc/core/tchebeval.inactive similarity index 100% rename from test/doc/polynomial/regular/tchebeval.cpp rename to test/doc/core/tchebeval.inactive diff --git a/test/doc/polynomial/regular/newton.cpp b/test/doc/math/newton.cpp similarity index 100% rename from test/doc/polynomial/regular/newton.cpp rename to test/doc/math/newton.cpp diff --git a/test/doc/math/regular/newton.cpp b/test/doc/math/regular/newton.cpp new file mode 100644 index 0000000000..44a6250a56 --- /dev/null +++ b/test/doc/math/regular/newton.cpp @@ -0,0 +1,33 @@ +#include +#include +#include + + +using w_t = eve::wide>; + +int main() +{ + + w_t xd = {-0.3, 0.5, 0.0, 2.0}; + w_t x(0.2); + kumi::tuple cs {1, 2, 3}; + kumi::tuple ns {6, 7}; + + std::cout << "---- simd" << '\n' + << "<- xd = " << xd << '\n' + << "<- x = " << x << '\n' + << "-> newton(x, 1, 2, 3, 6, 7) = " << eve::newton(x, 1, 2, 3, 6, 7) << '\n' + << "-> newton(xd, 1, 2, 3, 6, 7) = " << eve::newton(xd, 1, 2, 3, 6, 7) << '\n' + << "-> newton(x, cs, ns) = " << eve::newton(x, cs, ns) << '\n' + << "-> newton(xd, cs, ns) = " << eve::newton(xd, cs, ns) << '\n' + ; + + double xs = 0.2; + + std::cout << "---- scalar" << '\n' + << "<- xs = " << xs << '\n' + << "-> newton(xs, 1.0, 2.0, 3.0, 6.0, 7.0) = " << eve::newton(xs, 1.0, 2.0, 3.0, 6.0, 7.0) << '\n'; + + + return 0; +} diff --git a/test/unit/module/core/fam.cpp b/test/unit/module/core/fam.cpp index faf8e8e665..abaf75415c 100644 --- a/test/unit/module/core/fam.cpp +++ b/test/unit/module/core/fam.cpp @@ -59,7 +59,7 @@ TTS_CASE_WITH("Check precision behavior of fam on real types", using eve::fma; using eve::detail::map; TTS_IEEE_EQUAL(eve::pedantic(fam)(-eve::one(eve::as()), a0, a1), - eve::pedantic(fma)(a0, a1, -eve::one(eve::as()))); + fma[eve::pedantic](a0, a1, -eve::one(eve::as()))); }; //================================================================================================== @@ -117,8 +117,8 @@ TTS_CASE_WITH("Check behavior of fam on all types full range", using eve::detail::map; TTS_ULP_EQUAL(fam(a0, a1, a2), fma(a1, a2, a0), 10.5); - TTS_IEEE_EQUAL(eve::pedantic(fam)(a0, a1, a2), eve::pedantic(fma)(a1, a2, a0)); - TTS_IEEE_EQUAL(eve::numeric(fam)(a0, a1, a2), eve::pedantic(fma)(a1, a2, a0)); + TTS_IEEE_EQUAL(eve::pedantic(fam)(a0, a1, a2), fma[eve::pedantic](a1, a2, a0)); + TTS_IEEE_EQUAL(eve::numeric(fam)(a0, a1, a2), fma[eve::pedantic](a1, a2, a0)); }; //================================================================================================== diff --git a/test/unit/module/core/fma.cpp b/test/unit/module/core/fma.cpp index d13c2991da..f08a399038 100644 --- a/test/unit/module/core/fma.cpp +++ b/test/unit/module/core/fma.cpp @@ -5,11 +5,8 @@ **/ //================================================================================================== #include "test.hpp" - #include -#include - //================================================================================================== // Types tests //================================================================================================== @@ -58,8 +55,8 @@ TTS_CASE_WITH("Check precision behavior of fma on real types", using eve::detail::map; using v_t = eve::element_type_t; TTS_ULP_EQUAL( - eve::pedantic(fma)(a0, a1, -eve::one(eve::as())), - map([&](auto e, auto f) -> v_t { return eve::pedantic(fma)(e, f, v_t(-1)); }, a0, a1), + eve::fma[eve::pedantic](a0, a1, -eve::one(eve::as())), + map([&](auto e, auto f) -> v_t { return eve::fma[eve::pedantic](e, f, v_t(-1)); }, a0, a1), 2); }; @@ -77,15 +74,15 @@ TTS_CASE_WITH("Check precision behavior of fma on real types", using eve::detail::map; using v_t = eve::element_type_t; TTS_ULP_EQUAL( - eve::pedantic(fma)(a0, a1, -eve::one(eve::as())), - map([&](auto e, auto f) -> v_t { return eve::pedantic(fma)(e, f, v_t(-1)); }, a0, a1), + eve::fma[eve::pedantic](a0, a1, -eve::one(eve::as())), + map([&](auto e, auto f) -> v_t { return eve::fma[eve::pedantic](e, f, v_t(-1)); }, a0, a1), 2); }; //================================================================================================== // fma promote tests //================================================================================================== -TTS_CASE_WITH("Check behavior of promote(fma) on all types", +TTS_CASE_WITH("Check behavior of fma[promote] on all types", eve::test::simd::all_types, tts::generate(tts::randoms(eve::valmin, eve::valmax), tts::randoms(eve::valmin, eve::valmax))) @@ -98,24 +95,24 @@ TTS_CASE_WITH("Check behavior of promote(fma) on all types", constexpr int N = eve::cardinal_v; eve::wide> fa([](auto i, auto){return float(i)/2; }); - auto r1 = promote(fma)(a0, a1, fa); + auto r1 = fma[promote](a0, a1, fa); using er1_t = eve::element_type_t; auto refr1 = eve::fma(eve::convert(a0, eve::as()), eve::convert(a1, eve::as()), eve::convert(fa, eve::as())); TTS_ULP_EQUAL(r1, refr1, 2.0); eve::wide> da([](auto i, auto){return double(i)/3; }); - auto r2 = promote(fma)(a0, da, a1); + auto r2 = fma[promote](a0, da, a1); using er2_t = eve::element_type_t; auto refr2 = eve::fma(eve::convert(a0, eve::as()), eve::convert(da, eve::as()), eve::convert(a1, eve::as())); TTS_ULP_EQUAL(r2, refr2, 0.5); eve::wide> ia([](auto i, auto){return int(i); }); - auto r3 = promote(fma)(ia, a0, a1); + auto r3 = fma[promote](ia, a0, a1); using er3_t = eve::element_type_t; auto refr3 = eve::fma(eve::convert(ia, eve::as()), eve::convert(a0, eve::as()), eve::convert(a1, eve::as())); TTS_ULP_EQUAL(r3, refr3, 0.5); - auto r4 = promote(fma)(ia, da, a1); + auto r4 = fma[promote](ia, da, a1); using er4_t = eve::element_type_t; auto refr4= eve::fma(eve::convert(ia, eve::as()), eve::convert(da, eve::as()), eve::convert(a1, eve::as())); TTS_ULP_EQUAL(r4, refr4, 0.5); @@ -124,16 +121,18 @@ TTS_CASE_WITH("Check behavior of promote(fma) on all types", //================================================================================================== // fma masked //================================================================================================== -TTS_CASE_WITH("Check behavior of fma on all types full range", +TTS_CASE_WITH("Check behavior of masked fma on all types", eve::test::simd::all_types, - tts::generate(tts::randoms(eve::valmin, eve::valmax), - tts::randoms(eve::valmin, eve::valmax), - tts::randoms(eve::valmin, eve::valmax), + tts::generate(tts::randoms(1, 5), + tts::randoms(1, 5), + tts::randoms(1, 5), tts::logicals(0, 3))) (T const& a0, T const& a1, T const& a2, M const& t) { - using eve::as; using eve::fma; + using eve::if_; - TTS_IEEE_EQUAL(fma[t](a0, a1, a2), eve::if_else(t, fma[t](a0, a1, a2), a0)); + TTS_IEEE_EQUAL(fma[t](a0, a1, a2), eve::if_else(t, fma(a0, a1, a2), a0)); + TTS_IEEE_EQUAL(fma[if_(t).else_(100)](a0, a1, a2), eve::if_else(t, fma(a0, a1, a2), 100)); + TTS_IEEE_EQUAL(fma[eve::ignore_all](a0, a1, a2), a0); }; diff --git a/test/unit/module/polynomial/tchebeval.cpp b/test/unit/module/core/tchebeval.inactive similarity index 100% rename from test/unit/module/polynomial/tchebeval.cpp rename to test/unit/module/core/tchebeval.inactive diff --git a/test/unit/module/math/horner.cpp b/test/unit/module/math/horner.cpp index c4de580d40..f1039ccef9 100644 --- a/test/unit/module/math/horner.cpp +++ b/test/unit/module/math/horner.cpp @@ -8,18 +8,12 @@ #include "test.hpp" #include -#include -#include -#include -#include -#include +#include //================================================================================================== //== Types tests //================================================================================================== -TTS_CASE_TPL("Check return types of horner on wide", eve::test::simd::all_types - -) +TTS_CASE_TPL("Check return types of horner on wide", eve::test::simd::ieee_reals) (tts::type) { using v_t = eve::element_type_t; @@ -38,97 +32,36 @@ TTS_CASE_TPL("Check return types of horner on wide", eve::test::simd::all_types //================================================================================================== TTS_CASE_WITH("Check behavior of horner on wide", eve::test::simd::ieee_reals, - tts::generate(tts::randoms(-1.0, 1.0)) - )//generate(tts::ramp(0))) + tts::generate(tts::randoms(-10.0, 10.0)) + ) (T const& a0) { using eve::fma; using eve::horner; - using eve::numeric; - using eve::one; using eve::pedantic; - using eve::compensated; - //============================================================================ - //== variadic - //============================================================================ TTS_EQUAL(horner(a0, 0), T(0)); TTS_EQUAL(horner(a0, 1), T(1)); TTS_EQUAL(horner(a0, 1, 2), fma(a0, 1, 2)); TTS_EQUAL(horner(a0, 1, 2, 3), fma(a0, fma(a0, 1, 2), 3)); - TTS_EQUAL(pedantic(horner)(a0, 0), T(0)); - TTS_EQUAL(pedantic(horner)(a0, 1), T(1)); - TTS_EQUAL(pedantic(horner)(a0, 1, 2), pedantic(fma)(a0, 1, 2)); - TTS_EQUAL(pedantic(horner)(a0, 1, 2, 3), pedantic(fma)(a0, pedantic(fma)(a0, 1, 2), 3)); - - TTS_EQUAL(numeric(horner)(a0, 0), T(0)); - TTS_EQUAL(numeric(horner)(a0, 1), T(1)); - TTS_EQUAL(numeric(horner)(a0, 1, 2), numeric(fma)(a0, 1, 2)); - TTS_EQUAL(numeric(horner)(a0, 1, 2, 3), numeric(fma)(a0, numeric(fma)(a0, 1, 2), 3)); - - - { - //============================================================================ - //== tuples - //============================================================================ - auto tab0 = kumi::tuple{}; - auto tab1 = kumi::tuple{1}; - auto tab2 = kumi::tuple{1, 2}; - auto tab3 = kumi::tuple{1, 2, 3}; - - TTS_EQUAL((horner)(a0, tab0), T(0)); - TTS_EQUAL((horner)(a0, tab1), T(1)); - TTS_EQUAL((horner)(a0, tab2), (fma)(a0, 1, 2)); - TTS_EQUAL((horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); - - TTS_EQUAL(pedantic(horner)(a0, tab0), T(0)); - TTS_EQUAL(pedantic(horner)(a0, tab1), T(1)); - TTS_EQUAL(pedantic(horner)(a0, tab2), (fma)(a0, 1, 2)); - TTS_EQUAL(pedantic(horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); - - TTS_EQUAL(numeric(horner)(a0, tab0), T(0)); - TTS_EQUAL(numeric(horner)(a0, tab1), T(1)); - TTS_EQUAL(numeric(horner)(a0, tab2), (fma)(a0, 1, 2)); - TTS_EQUAL(numeric(horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); + TTS_EQUAL(horner[pedantic](a0, 0), T(0)); + TTS_EQUAL(horner[pedantic](a0, 1), T(1)); + TTS_EQUAL(horner[pedantic](a0, 1, 2), fma[pedantic](a0, 1, 2)); + TTS_EQUAL(horner[pedantic](a0, 1, 2, 3), fma[pedantic](a0, fma[pedantic](a0, 1, 2), 3)); - }; - - { - //============================================================================ - //== ranges - //============================================================================ - using v_t = eve::element_type_t; - - std::vector tab0; // std does not want array of size 0 - std::array tab1 = {1}; - std::array tab2 = {1, 2}; - std::array tab3 = {1, 2, 3}; - - TTS_EQUAL((horner)(a0, tab0), T(0)); - TTS_EQUAL((horner)(a0, tab1), T(1)); - TTS_EQUAL((horner)(a0, tab2), (fma)(a0, 1, 2)); - TTS_EQUAL((horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); - - TTS_EQUAL(pedantic(horner)(a0, tab0), T(0)); - TTS_EQUAL(pedantic(horner)(a0, tab1), T(1)); - TTS_EQUAL(pedantic(horner)(a0, tab2), (fma)(a0, 1, 2)); - TTS_EQUAL(pedantic(horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); - - TTS_EQUAL(numeric(horner)(a0, tab0), T(0)); - TTS_EQUAL(numeric(horner)(a0, tab1), T(1)); - TTS_EQUAL(numeric(horner)(a0, tab2), (fma)(a0, 1, 2)); - TTS_EQUAL(numeric(horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); - - TTS_EQUAL(compensated(horner)(a0, tab0), T(0)); - TTS_EQUAL(compensated(horner)(a0, tab1), T(1)); - TTS_EQUAL(compensated(horner)(a0, tab2), (fma)(a0, 1, 2)); - TTS_EQUAL(compensated(horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); + //============================================================================ + //== tuples + //============================================================================ + auto tab1 = kumi::tuple{1}; + auto tab2 = kumi::tuple{1, 2}; + auto tab3 = kumi::tuple{1, 2, 3}; - TTS_EQUAL((horner)(a0, tab0), eve::detail::poleval(a0, tab0)); - TTS_EQUAL((horner)(a0, tab1), eve::detail::poleval(a0, tab1)); - TTS_EQUAL((horner)(a0, tab2), eve::detail::poleval(a0, tab2)); - TTS_EQUAL((horner)(a0, tab3), eve::detail::poleval(a0, tab3)); - }; + TTS_EQUAL(horner(a0, tab1), T(1)); + TTS_EQUAL(horner(a0, tab2), fma(a0, 1, 2)); + TTS_EQUAL(horner(a0, tab3), fma(a0, fma(a0, 1, 2), 3)); + TTS_EQUAL(horner[pedantic](a0, tab1), T(1)); + TTS_EQUAL(horner[pedantic](a0, tab2), fma[pedantic](a0, 1, 2)); + TTS_EQUAL(horner[pedantic](a0, tab3), fma[pedantic](a0, fma[pedantic](a0, 1, 2), 3)); }; diff --git a/test/unit/module/polynomial/newton.cpp b/test/unit/module/math/newton.cpp similarity index 51% rename from test/unit/module/polynomial/newton.cpp rename to test/unit/module/math/newton.cpp index 00df39de45..c3e638024d 100644 --- a/test/unit/module/polynomial/newton.cpp +++ b/test/unit/module/math/newton.cpp @@ -16,18 +16,18 @@ //================================================================================================== //== Types tests //================================================================================================== -TTS_CASE_TPL("Check return types of newton on wide", eve::test::simd::all_types +// TTS_CASE_TPL("Check return types of newton on wide", eve::test::simd::all_types -) -(tts::type) -{ - using v_t = eve::element_type_t; - using rv_t = std::vector; - using rl_t = std::list; - TTS_EXPR_IS(eve::newton(T(), rv_t(), rv_t()), T); - TTS_EXPR_IS(eve::newton(T(), rv_t(), rl_t()), T); - TTS_EXPR_IS(eve::newton(T(), rl_t(), rl_t()), T); -}; +// ) +// (tts::type) +// { +// using v_t = eve::element_type_t; +// using rv_t = std::vector; +// using rl_t = std::list; +// TTS_EXPR_IS(eve::newton(T(), rv_t(), rv_t()), T); +// TTS_EXPR_IS(eve::newton(T(), rv_t(), rl_t()), T); +// TTS_EXPR_IS(eve::newton(T(), rl_t(), rl_t()), T); +// }; //================================================================================================== //== newton tests @@ -42,29 +42,15 @@ TTS_CASE_WITH("Check behavior of newton on wide", using eve::numeric; using eve::one; using eve::pedantic; - using v_t = eve::element_type_t; + //============================================================================ - //== ranges + //== variadic //============================================================================ - std::vector tab0; - std::vector tab1{1}; - std::vector tab2{1, 2}; - std::vector tab3{1, 2, 3}; - TTS_EQUAL((newton)(a0, tab0, tab0), T(0)); - TTS_EQUAL((newton)(a0, tab1, tab0), T(1)); - TTS_EQUAL((newton)(a0, tab2, tab1), (fma)(a0 - 1, 1, 2)); - TTS_EQUAL((newton)(a0, tab3, tab2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); - - TTS_EQUAL(pedantic(newton)(a0, tab0, tab0), T(0)); - TTS_EQUAL(pedantic(newton)(a0, tab1, tab0), T(1)); - TTS_EQUAL(pedantic(newton)(a0, tab2, tab1), (fma)(a0 - 1, 1, 2)); - TTS_EQUAL(pedantic(newton)(a0, tab3, tab2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); - - TTS_EQUAL(numeric(newton)(a0, tab0, tab0), T(0)); - TTS_EQUAL(numeric(newton)(a0, tab1, tab0), T(1)); - TTS_EQUAL(numeric(newton)(a0, tab2, tab1), (fma)(a0 - 1, 1, 2)); - TTS_EQUAL(numeric(newton)(a0, tab3, tab2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); + TTS_EQUAL((newton)(a0), T(0)); + TTS_EQUAL((newton)(a0, 1), T(1)); + TTS_EQUAL((newton)(a0, 1, 2, 1), (fma)(a0 - 1, 1, 2)); + TTS_EQUAL((newton)(a0, 1, 2, 3, 1, 2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); //============================================================================ @@ -80,13 +66,8 @@ TTS_CASE_WITH("Check behavior of newton on wide", TTS_EQUAL((newton)(a0, tup2, tup1), (fma)(a0 - 1, 1, 2)); TTS_EQUAL((newton)(a0, tup3, tup2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); - TTS_EQUAL(pedantic(newton)(a0, tup0, tup0), T(0)); - TTS_EQUAL(pedantic(newton)(a0, tup1, tup0), T(1)); - TTS_EQUAL(pedantic(newton)(a0, tup2, tup1), (fma)(a0 - 1, 1, 2)); - TTS_EQUAL(pedantic(newton)(a0, tup3, tup2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); - - TTS_EQUAL(numeric(newton)(a0, tup0, tup0), T(0)); - TTS_EQUAL(numeric(newton)(a0, tup1, tup0), T(1)); - TTS_EQUAL(numeric(newton)(a0, tup2, tup1), (fma)(a0 - 1, 1, 2)); - TTS_EQUAL(numeric(newton)(a0, tup3, tup2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); + TTS_EQUAL(newton[pedantic](a0, tup0, tup0), T(0)); + TTS_EQUAL(newton[pedantic](a0, tup1, tup0), T(1)); + TTS_EQUAL(newton[pedantic](a0, tup2, tup1), (fma)(a0 - 1, 1, 2)); + TTS_EQUAL(newton[pedantic](a0, tup3, tup2), (fma)(a0 - 2, (fma)(a0 - 1, 1, 2), 3)); }; diff --git a/test/unit/module/math/reverse_horner.cpp b/test/unit/module/math/reverse_horner.cpp index ddb1611960..1e6653e70b 100644 --- a/test/unit/module/math/reverse_horner.cpp +++ b/test/unit/module/math/reverse_horner.cpp @@ -53,15 +53,10 @@ TTS_CASE_WITH("Check behavior of reverse_horner on wide", TTS_EQUAL(reverse_horner(a0, 2, 1), fma(a0, 1, 2)); TTS_EQUAL(reverse_horner(a0, 3, 2, 1), fma(a0, fma(a0, 1, 2), 3)); - TTS_EQUAL(pedantic(reverse_horner)(a0, 0), T(0)); - TTS_EQUAL(pedantic(reverse_horner)(a0, 1), T(1)); - TTS_EQUAL(pedantic(reverse_horner)(a0, 2, 1), pedantic(fma)(a0, 1, 2)); - TTS_EQUAL(pedantic(reverse_horner)(a0, 3, 2, 1), pedantic(fma)(a0, pedantic(fma)(a0, 1, 2), 3)); - -// TTS_EQUAL(numeric(reverse_horner)(a0, 0), T(0)); -// TTS_EQUAL(numeric(reverse_horner)(a0, 1), T(1)); -// TTS_EQUAL(numeric(reverse_horner)(a0, 2, 1), numeric(fma)(a0, 1, 2)); -// TTS_EQUAL(numeric(reverse_horner)(a0, 3, 2, 1), numeric(fma)(a0, numeric(fma)(a0, 1, 2), 3)); + TTS_EQUAL(reverse_horner[pedantic](a0, 0), T(0)); + TTS_EQUAL(reverse_horner[pedantic](a0, 1), T(1)); + TTS_EQUAL(reverse_horner[pedantic](a0, 2, 1), fma[pedantic](a0, 1, 2)); + TTS_EQUAL(reverse_horner[pedantic](a0, 3, 2, 1), fma[pedantic](a0, fma[pedantic](a0, 1, 2), 3)); // { @@ -88,30 +83,6 @@ TTS_CASE_WITH("Check behavior of reverse_horner on wide", // TTS_EQUAL(numeric(reverse_horner)(a0, tab2), (fma)(a0, 1, 2)); // TTS_EQUAL(numeric(reverse_horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); // } -// { -// //============================================================================ -// //== ranges -// //============================================================================ -// using v_t = eve::element_type_t; -// std::vector tab0; // std does not want array of size 0 -// std::array tab1 = {1}; -// std::array tab2 = {2, 1}; -// std::array tab3 = {3, 2, 1}; - -// TTS_EQUAL((reverse_horner)(a0, tab0), T(0)); -// TTS_EQUAL((reverse_horner)(a0, tab1), T(1)); -// TTS_EQUAL((reverse_horner)(a0, tab2), (fma)(a0, 1, 2)); -// TTS_EQUAL((reverse_horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); -// TTS_EQUAL(pedantic(reverse_horner)(a0, tab0), T(0)); -// TTS_EQUAL(pedantic(reverse_horner)(a0, tab1), T(1)); -// TTS_EQUAL(pedantic(reverse_horner)(a0, tab2), (fma)(a0, 1, 2)); -// TTS_EQUAL(pedantic(reverse_horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); - -// TTS_EQUAL(numeric(reverse_horner)(a0, tab0), T(0)); -// TTS_EQUAL(numeric(reverse_horner)(a0, tab1), T(1)); -// TTS_EQUAL(numeric(reverse_horner)(a0, tab2), (fma)(a0, 1, 2)); -// TTS_EQUAL(numeric(reverse_horner)(a0, tab3), (fma)(a0, (fma)(a0, 1, 2), 3)); -// } };