From c8b49ddb70777c94dc5c62cd329ea75c805a6c67 Mon Sep 17 00:00:00 2001 From: jtlap Date: Fri, 12 Jan 2024 18:16:40 +0100 Subject: [PATCH] Move special to new functor style --- .../pedantic/impl/lrising_factorial.hpp | 96 ----- .../pedantic/impl/rising_factorial.hpp | 2 +- .../special/pedantic/lrising_factorial.hpp | 10 - .../eve/module/special/pedantic/special.hpp | 1 - include/eve/module/special/regular/beta.hpp | 16 +- .../eve/module/special/regular/betainc.hpp | 25 +- .../module/special/regular/betainc_inv.hpp | 17 +- include/eve/module/special/regular/dawson.hpp | 15 +- .../eve/module/special/regular/digamma.hpp | 16 +- .../special/regular/double_factorial.hpp | 17 +- include/eve/module/special/regular/erf.hpp | 15 +- .../eve/module/special/regular/erf_inv.hpp | 15 +- include/eve/module/special/regular/erfc.hpp | 15 +- .../eve/module/special/regular/erfc_inv.hpp | 15 +- include/eve/module/special/regular/erfcx.hpp | 15 +- .../eve/module/special/regular/exp_int.hpp | 24 +- .../eve/module/special/regular/factorial.hpp | 31 +- .../eve/module/special/regular/gamma_p.hpp | 20 +- .../module/special/regular/gamma_p_inv.hpp | 15 +- .../eve/module/special/regular/impl/beta.hpp | 47 +- .../module/special/regular/impl/betainc.hpp | 116 +++-- .../special/regular/impl/betainc_inv.hpp | 145 +++---- .../module/special/regular/impl/dawson.hpp | 239 +++++------ .../module/special/regular/impl/digamma.hpp | 297 ++++++------- .../special/regular/impl/double_factorial.hpp | 43 +- .../eve/module/special/regular/impl/erf.hpp | 199 ++++----- .../module/special/regular/impl/erf_inv.hpp | 13 +- .../eve/module/special/regular/impl/erfc.hpp | 202 +++++---- .../module/special/regular/impl/erfc_inv.hpp | 21 +- .../eve/module/special/regular/impl/erfcx.hpp | 23 +- .../module/special/regular/impl/exp_int.hpp | 237 +++++----- .../module/special/regular/impl/factorial.hpp | 405 +++++++++--------- .../module/special/regular/impl/gamma_p.hpp | 171 ++++---- .../special/regular/impl/gamma_p_inv.hpp | 133 +++--- .../module/special/regular/impl/lambert.hpp | 150 +++---- .../eve/module/special/regular/impl/lbeta.hpp | 46 +- .../special/regular/impl/lfactorial.hpp | 6 +- .../special/regular/impl/log_abs_gamma.hpp | 22 +- .../module/special/regular/impl/log_gamma.hpp | 34 +- .../regular/impl/lrising_factorial.hpp | 239 ++++++++--- .../eve/module/special/regular/impl/omega.hpp | 82 ++-- .../special/regular/impl/rising_factorial.hpp | 115 ++--- .../module/special/regular/impl/signgam.hpp | 42 +- .../module/special/regular/impl/stirling.hpp | 30 +- .../module/special/regular/impl/tgamma.hpp | 294 ++++++------- .../eve/module/special/regular/impl/zeta.hpp | 115 +++-- .../eve/module/special/regular/lambert.hpp | 17 +- include/eve/module/special/regular/lbeta.hpp | 16 +- .../eve/module/special/regular/lfactorial.hpp | 28 +- .../module/special/regular/log_abs_gamma.hpp | 17 +- .../eve/module/special/regular/log_gamma.hpp | 15 +- .../special/regular/lrising_factorial.hpp | 16 +- include/eve/module/special/regular/omega.hpp | 16 +- .../special/regular/rising_factorial.hpp | 16 +- .../eve/module/special/regular/signgam.hpp | 14 +- .../eve/module/special/regular/stirling.hpp | 15 +- include/eve/module/special/regular/tgamma.hpp | 16 +- include/eve/module/special/regular/zeta.hpp | 15 +- .../eve/traits/overload/default_behaviors.hpp | 2 +- .../special/pedantic/lrising_factorial.cpp | 2 +- .../doc/special/pedantic/rising_factorial.cpp | 2 +- test/doc/special/raw/lrising_factorial.cpp | 2 +- test/doc/special/raw/rising_factorial.cpp | 2 +- test/unit/module/special/lbeta.cpp | 81 ++++ .../unit/module/special/lrising_factorial.cpp | 44 +- test/unit/module/special/rising_factorial.cpp | 44 +- 66 files changed, 2127 insertions(+), 2099 deletions(-) delete mode 100644 include/eve/module/special/pedantic/impl/lrising_factorial.hpp delete mode 100644 include/eve/module/special/pedantic/lrising_factorial.hpp create mode 100644 test/unit/module/special/lbeta.cpp diff --git a/include/eve/module/special/pedantic/impl/lrising_factorial.hpp b/include/eve/module/special/pedantic/impl/lrising_factorial.hpp deleted file mode 100644 index 0dcfda9759..0000000000 --- a/include/eve/module/special/pedantic/impl/lrising_factorial.hpp +++ /dev/null @@ -1,96 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace eve::detail -{ -// pedantic computes also for negative values and even negative integer values -template -EVE_FORCEINLINE auto -lrising_factorial_(EVE_SUPPORTS(cpu_), pedantic_type const&, T a, T x) noexcept -{ - if constexpr( has_native_abi_v ) - { - auto r = nan(as(a)); - auto notdone = is_not_nan(a) && is_not_nan(x); - - auto lr0 = []() { return zero(as(T())); }; - - auto lrpos = [](auto a, auto x) { return inner_lrising_factorial(a, x); }; - - auto lrnegint = [](auto a, auto x) - // a and a+x are negative integers uses reflection. - { - auto r = inner_lrising_factorial(-a, -x); - return -eve::log1p(x / a) - r; - }; - - auto lraeqmx = [](auto a, auto) - // a+x = 0 - { return log_abs_gamma(inc(a)); }; - - auto lraneqmx = [](auto a, auto) - // a < 0.0 && a+x < 0.0 - { return minf(as(a)); }; - - auto lrneg = [](auto a, auto x) - { - // Reduce to positive case using reflection. - auto oma = oneminus(a); - auto spioma = sinpi(oma); - auto spiomamx = sinpi(oma - x); - auto r = inner_lrising_factorial(oma, -x); - auto lnterm = eve::log(eve::abs(spioma / spiomamx)); - return if_else(is_nez(spiomamx * spioma), lnterm - r, allbits); - }; - - auto lrlast = [](auto a, auto x) { return eve::log_abs_gamma(a + x) - eve::log_abs_gamma(a); }; - - if( eve::any(notdone) ) - { - notdone = next_interval(lr0, notdone, is_eqz(x), r); - if( eve::any(notdone) ) - { - auto testpos = is_nlez(a) && is_nlez(a + x); - notdone = next_interval(lrpos, notdone, testpos, r, a, x); - if( eve::any(notdone) ) - { - // from here a+x <= 0 || x <= 0 - auto aneg = is_ltz(a); - auto aflint = is_flint(a); - auto testnegint = aflint && is_flint(a + x) && aneg && is_ltz(a + x); - notdone = next_interval(lrnegint, notdone, testnegint, r, a, x); - if( eve::any(notdone) ) - { - notdone = next_interval(lraeqmx, notdone, is_eqz(a + x), r, a, x); - if( eve::any(notdone) ) - { - notdone = next_interval(lraneqmx, notdone, aflint && !is_eqz(a + x), r, a, x); - if( eve::any(notdone) ) - { - notdone = next_interval(lrneg, notdone, aneg && is_ltz(a + x), r, a, x); - if( eve::any(notdone) ) { notdone = last_interval(lrlast, notdone, r, a, x); } - } - } - } - } - } - } - return r; - } - else return apply_over(pedantic(lrising_factorial), a, x); -} -} diff --git a/include/eve/module/special/pedantic/impl/rising_factorial.hpp b/include/eve/module/special/pedantic/impl/rising_factorial.hpp index baf3ea769a..544b32540f 100644 --- a/include/eve/module/special/pedantic/impl/rising_factorial.hpp +++ b/include/eve/module/special/pedantic/impl/rising_factorial.hpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include namespace eve::detail diff --git a/include/eve/module/special/pedantic/lrising_factorial.hpp b/include/eve/module/special/pedantic/lrising_factorial.hpp deleted file mode 100644 index a558e9c10c..0000000000 --- a/include/eve/module/special/pedantic/lrising_factorial.hpp +++ /dev/null @@ -1,10 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include diff --git a/include/eve/module/special/pedantic/special.hpp b/include/eve/module/special/pedantic/special.hpp index 03817cbb2d..15f12d634c 100644 --- a/include/eve/module/special/pedantic/special.hpp +++ b/include/eve/module/special/pedantic/special.hpp @@ -7,5 +7,4 @@ //================================================================================================== #pragma once -#include #include diff --git a/include/eve/module/special/regular/beta.hpp b/include/eve/module/special/regular/beta.hpp index 98be54386a..ed54b58cbe 100644 --- a/include/eve/module/special/regular/beta.hpp +++ b/include/eve/module/special/regular/beta.hpp @@ -6,10 +6,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct beta_t : elementwise_callable + { + template + EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(beta_t, beta_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -45,7 +57,7 @@ namespace eve //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(beta_, beta); + inline constexpr auto beta = functor; } #include diff --git a/include/eve/module/special/regular/betainc.hpp b/include/eve/module/special/regular/betainc.hpp index f4abc5fac3..5c49a15c56 100644 --- a/include/eve/module/special/regular/betainc.hpp +++ b/include/eve/module/special/regular/betainc.hpp @@ -7,15 +7,28 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { +template +struct betainc_t : elementwise_callable +{ + template + EVE_FORCEINLINE + eve::common_value_t + operator()(T0 a, T1 b, T2 c) const noexcept { return EVE_DISPATCH_CALL(a, b, c); } + + EVE_CALLABLE_OBJECT(betainc_t, betainc_); +}; + //================================================================================================ //! @addtogroup special //! @{ -//! @var betainc -//! @brief Computes the beta incomplete function. \f$\displaystyle \mbox{I}_s(x,y) = +//! @var betaincinc +//! @brief Computes the betainc incomplete function. \f$\displaystyle \mbox{I}_s(x,y) = //! \frac{1}{\mbox{B}(x,y)}\int_0^s t^{x-1}(1-t)^{y-1}\mbox{d}t\f$ //! //! **Defined in header** @@ -44,14 +57,14 @@ namespace eve //! //! **Return value** //! -//! The value of the incomplete beta function is returned. +//! The value of the incomplete betainc function is returned. //! //! @groupheader{Example} //! -//! @godbolt{doc/special/regular/betainc.cpp} +//! @godbolt{doc/special/regular/betaincinc.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(betainc_, betainc); + inline constexpr auto betainc = functor; } #include diff --git a/include/eve/module/special/regular/betainc_inv.hpp b/include/eve/module/special/regular/betainc_inv.hpp index d4ec13370c..3c183269ce 100644 --- a/include/eve/module/special/regular/betainc_inv.hpp +++ b/include/eve/module/special/regular/betainc_inv.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct betainc_inv_t : elementwise_callable + { + template + EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b, T2 c) const noexcept { return EVE_DISPATCH_CALL(a, b, c); } + + EVE_CALLABLE_OBJECT(betainc_inv_t, betainc_inv_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -51,7 +63,8 @@ namespace eve //! @godbolt{doc/special/regular/betainc_inv.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(betainc_inv_, betainc_inv); + inline constexpr auto betainc_inv = functor; } + #include diff --git a/include/eve/module/special/regular/dawson.hpp b/include/eve/module/special/regular/dawson.hpp index 4188767717..fff1dafac4 100644 --- a/include/eve/module/special/regular/dawson.hpp +++ b/include/eve/module/special/regular/dawson.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct dawson_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(dawson_t, dawson_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -47,7 +58,7 @@ namespace eve //! @godbolt{doc/special/regular/dawson.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(dawson_, dawson); +inline constexpr auto dawson = functor; } #include diff --git a/include/eve/module/special/regular/digamma.hpp b/include/eve/module/special/regular/digamma.hpp index bfc68d4fbd..ce5a77d011 100644 --- a/include/eve/module/special/regular/digamma.hpp +++ b/include/eve/module/special/regular/digamma.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { +template +struct digamma_t : elementwise_callable +{ + template + EVE_FORCEINLINE T operator()(T v) const { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(digamma_t, digamma_); +}; + //================================================================================================ //! @addtogroup special //! @{ @@ -47,7 +58,8 @@ namespace eve //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(digamma_, digamma); +inline constexpr auto digamma = functor; } + #include diff --git a/include/eve/module/special/regular/double_factorial.hpp b/include/eve/module/special/regular/double_factorial.hpp index 4c9fbf9e70..77590b8a91 100644 --- a/include/eve/module/special/regular/double_factorial.hpp +++ b/include/eve/module/special/regular/double_factorial.hpp @@ -7,10 +7,23 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct double_factorial_t : elementwise_callable + { + template + EVE_FORCEINLINE + as_wide_as_t + operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(double_factorial_t, double_factorial_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -49,7 +62,7 @@ namespace eve //! @godbolt{doc/special/regular/double_factorial.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(double_factorial_, double_factorial); +inline constexpr auto double_factorial = functor; } #include diff --git a/include/eve/module/special/regular/erf.hpp b/include/eve/module/special/regular/erf.hpp index f7d3ab5735..18b1944f0d 100644 --- a/include/eve/module/special/regular/erf.hpp +++ b/include/eve/module/special/regular/erf.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct erf_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(erf_t, erf_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -52,7 +63,7 @@ namespace eve //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(erf_, erf); +inline constexpr auto erf = functor; } #include diff --git a/include/eve/module/special/regular/erf_inv.hpp b/include/eve/module/special/regular/erf_inv.hpp index 544f0a4e02..bbdf651d36 100644 --- a/include/eve/module/special/regular/erf_inv.hpp +++ b/include/eve/module/special/regular/erf_inv.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct erf_inv_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(erf_inv_t, erf_inv_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -47,7 +58,7 @@ namespace eve //! @godbolt{doc/special/regular/erf_inv.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(erf_inv_, erf_inv); +inline constexpr auto erf_inv = functor; } #include diff --git a/include/eve/module/special/regular/erfc.hpp b/include/eve/module/special/regular/erfc.hpp index 24ae87e1f5..e6627345ec 100644 --- a/include/eve/module/special/regular/erfc.hpp +++ b/include/eve/module/special/regular/erfc.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct erfc_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(erfc_t, erfc_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -52,7 +63,7 @@ namespace eve //! @godbolt{doc/special/regular/erf.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(erfc_, erfc); +inline constexpr auto erfc = functor; } #include diff --git a/include/eve/module/special/regular/erfc_inv.hpp b/include/eve/module/special/regular/erfc_inv.hpp index bed42a2c95..327aa4ed36 100644 --- a/include/eve/module/special/regular/erfc_inv.hpp +++ b/include/eve/module/special/regular/erfc_inv.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct erfc_inv_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(erfc_inv_t, erfc_inv_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -53,7 +64,7 @@ namespace eve //! @godbolt{doc/special/regular/erfc_inv.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(erfc_inv_, erfc_inv); +inline constexpr auto erfc_inv = functor; } #include diff --git a/include/eve/module/special/regular/erfcx.hpp b/include/eve/module/special/regular/erfcx.hpp index 58240b46aa..82f8e16a11 100644 --- a/include/eve/module/special/regular/erfcx.hpp +++ b/include/eve/module/special/regular/erfcx.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct erfcx_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(erfcx_t, erfcx_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -47,7 +58,7 @@ namespace eve //! @godbolt{doc/special/regular/erfcx.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(erfcx_, erfcx); +inline constexpr auto erfcx = functor; } #include diff --git a/include/eve/module/special/regular/exp_int.hpp b/include/eve/module/special/regular/exp_int.hpp index cc1de1114d..707197804c 100644 --- a/include/eve/module/special/regular/exp_int.hpp +++ b/include/eve/module/special/regular/exp_int.hpp @@ -7,10 +7,24 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { +template +struct exp_int_t : elementwise_callable +{ + template + EVE_FORCEINLINE eve::as_wide_as_t operator()(I n, T v) const noexcept { return EVE_DISPATCH_CALL(n, v); } + + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(exp_int_t, exp_int_); +}; + //================================================================================================ //! @addtogroup special //! @{ @@ -29,6 +43,9 @@ namespace eve //! @code //! namespace eve //! { +//! template< eve::floating_ordered_value T > +//! T exp_int(T x) noexcept; +//! //! template< eve::unsigned_value N, eve::floating_ordered_value T > //! T exp_int(N n, T x) noexcept; //! } @@ -36,7 +53,7 @@ namespace eve //! //! **Parameters** //! -//! * `n` : [unsigned argument](@ref eve::unsigned_value). +//! * `n` : [unsigned argument](@ref eve::unsigned_value). If not present taken to be 1. //! //! * `x` : [real floating argument](@ref eve::floating_ordered_value). //! @@ -49,7 +66,8 @@ namespace eve //! @godbolt{doc/special/regular/exp_int.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(exp_int_, exp_int); +inline constexpr auto exp_int = functor; } + #include diff --git a/include/eve/module/special/regular/factorial.hpp b/include/eve/module/special/regular/factorial.hpp index e1a669d9f0..79c208f851 100644 --- a/include/eve/module/special/regular/factorial.hpp +++ b/include/eve/module/special/regular/factorial.hpp @@ -6,11 +6,27 @@ */ //================================================================================================== #pragma once - -#include +#include +#include +#include namespace eve { + template + struct factorial_t : elementwise_callable + { + template + EVE_FORCEINLINE + as_wide_as_t + operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + template + EVE_FORCEINLINE + T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(factorial_t, factorial_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -28,14 +44,17 @@ namespace eve //! @code //! namespace eve //! { -//! template< eve::value N > -//! eve::as_floating_point_value factorial(N x) noexcept; +//! template< eve::integral_value N > +//! as_wide_as factorial(N x) noexcept; +//! +//! template< eve::floating_ordered_value T > +//! T factorial(N x) noexcept; //! } //! @endcode //! //! **Parameters** //! -//! * `n` : [unsigned argument](@ref eve::unsigned_value). +//! * `n` : must be of integral type or flint. //! //! **Return value** //! @@ -50,7 +69,7 @@ namespace eve //! @godbolt{doc/special/regular/factorial.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(factorial_, factorial); +inline constexpr auto factorial = functor; } #include diff --git a/include/eve/module/special/regular/gamma_p.hpp b/include/eve/module/special/regular/gamma_p.hpp index 740c6b7183..b69554a583 100644 --- a/include/eve/module/special/regular/gamma_p.hpp +++ b/include/eve/module/special/regular/gamma_p.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { +template +struct gamma_p_t : elementwise_callable +{ + template + EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(gamma_p_t, gamma_p_); +}; + //================================================================================================ //! @addtogroup special //! @{ @@ -28,8 +40,8 @@ namespace eve //! @code //! namespace eve //! { -//! template< eve::floating_ordered_value T > -//! T gamma_p(T x) noexcept; +//! template< eve::floating_ordered_value T0, floating_ordered_value T1> +//! T gamma_p(T1 x, T2 y) noexcept; //! } //! @endcode //! @@ -47,7 +59,7 @@ namespace eve //! @godbolt{doc/special/regular/gamma_p.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(gamma_p_, gamma_p); + inline constexpr auto gamma_p = functor; } #include diff --git a/include/eve/module/special/regular/gamma_p_inv.hpp b/include/eve/module/special/regular/gamma_p_inv.hpp index 6a62735863..6b5cb469ec 100644 --- a/include/eve/module/special/regular/gamma_p_inv.hpp +++ b/include/eve/module/special/regular/gamma_p_inv.hpp @@ -8,10 +8,21 @@ #pragma once #include -#include +#include +#include namespace eve { +template +struct gamma_p_inv_t : elementwise_callable +{ + template + EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(gamma_p_inv_t, gamma_p_inv_); +}; + //================================================================================================ //! @addtogroup special //! @{ @@ -49,7 +60,7 @@ namespace eve //! @godbolt{doc/special/regular/gamma_p.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(gamma_p_inv_, gamma_p_inv); + inline constexpr auto gamma_p_inv = functor; } #include diff --git a/include/eve/module/special/regular/impl/beta.hpp b/include/eve/module/special/regular/impl/beta.hpp index 173fee5550..cb33b05c40 100644 --- a/include/eve/module/special/regular/impl/beta.hpp +++ b/include/eve/module/special/regular/impl/beta.hpp @@ -14,36 +14,19 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -beta_(EVE_SUPPORTS(cpu_), T a0, U a1) noexcept --> common_value_t -{ - return arithmetic_call(beta, a0, a1); -} - -template -EVE_FORCEINLINE T -beta_(EVE_SUPPORTS(cpu_), T a0, T a1) noexcept -{ - auto y = a0 + a1; - auto sign = eve::signgam(a0) * eve::signgam(a1) * eve::signgam(y); - return sign * exp(log_abs_gamma(a0) + log_abs_gamma(a1) - log_abs_gamma(y)); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -beta_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, beta(t0, ts...), t0)){ - return mask_op(cond, eve::beta, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -beta_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, beta(t0, ts...), t0)){ - return mask_op(cond, d(eve::beta), t0, ts ...); -} + template< typename T0, typename T1, callable_options O> + EVE_FORCEINLINE + eve::common_value_t beta_(EVE_REQUIRES(cpu_), O const&, T0 const& a0, T1 const & a1) + { + if constexpr(std::same_as) + { + auto y = a0 + a1; + auto sign = eve::signgam(a0) * eve::signgam(a1) * eve::signgam(y); + return sign * exp(log_abs_gamma(a0) + log_abs_gamma(a1) - log_abs_gamma(y)); + } + else + { + return arithmetic_call(beta, a0, a1); + } + } } diff --git a/include/eve/module/special/regular/impl/betainc.hpp b/include/eve/module/special/regular/impl/betainc.hpp index 1980376563..18259c2ff7 100644 --- a/include/eve/module/special/regular/impl/betainc.hpp +++ b/include/eve/module/special/regular/impl/betainc.hpp @@ -15,74 +15,56 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -betainc_(EVE_SUPPORTS(cpu_), T x, U a, V b) noexcept --> common_value_t -{ - return arithmetic_call(betainc, x, a, b); -} - -template -EVE_FORCEINLINE T -betainc_(EVE_SUPPORTS(cpu_), T x, T a, T b) noexcept requires(has_native_abi_v) -{ - auto betacf = [](auto x, auto a, auto b) + template< typename T0, typename T1, typename T2, callable_options O> + EVE_FORCEINLINE + eve::common_value_t betainc_(EVE_REQUIRES(cpu_), O const& + , T0 const& x, T1 const & a, T2 const & b) { - // continued fraction for incomplete Beta function, used by betainc - constexpr std::size_t itmax = 100; - auto const o = one(as(x)); - auto epsi = 10 * eps(as(x)); - auto fpmin = sqr(eps(as(x))); - auto qab = a + b; - auto qap = inc(a); - auto qam = dec(a); - auto c = o; - auto d = rec(maxmag(oneminus(qab * x / qap), fpmin)); - auto h = d; - for( std::size_t m = 1; m <= itmax; ++m ) + if constexpr(std::same_as && std::same_as) { - T vm(m); - auto vm2 = vm + vm; - auto aa = vm * (b - vm) * x / ((qam + vm2) * (a + vm2)); - d = rec(maxmag(fma(aa, d, o), fpmin)); - c = maxmag(fma(aa, rec(c), o), fpmin); - h *= d * c; - aa = -(a + vm) * (qab + vm) * x / ((a + vm2) * (qap + vm2)); - d = rec(maxmag(fma(aa, d, o), fpmin)); - c = maxmag(fma(aa, rec(c), o), fpmin); - auto del = d * c; - h *= del; - if( eve::all(eve::abs(oneminus(del)) < epsi) ) return h; // Are we done? + auto betacf = [](auto x, auto a, auto b) + { + // continued fraction for incomplete Beta function, used by betainc + constexpr std::size_t itmax = 100; + auto const o = one(as(x)); + auto epsi = 10 * eps(as(x)); + auto fpmin = sqr(eps(as(x))); + auto qab = a + b; + auto qap = inc(a); + auto qam = dec(a); + auto c = o; + auto d = rec(maxmag(oneminus(qab * x / qap), fpmin)); + auto h = d; + for( std::size_t m = 1; m <= itmax; ++m ) + { + T0 vm(m); + auto vm2 = vm + vm; + auto aa = vm * (b - vm) * x / ((qam + vm2) * (a + vm2)); + d = rec(maxmag(fma(aa, d, o), fpmin)); + c = maxmag(fma(aa, rec(c), o), fpmin); + h *= d * c; + aa = -(a + vm) * (qab + vm) * x / ((a + vm2) * (qap + vm2)); + d = rec(maxmag(fma(aa, d, o), fpmin)); + c = maxmag(fma(aa, rec(c), o), fpmin); + auto del = d * c; + h *= del; + if( eve::all(eve::abs(oneminus(del)) < epsi) ) return h; // Are we done? + } + return h; + }; + auto bt = exp(fma(a, log(x), b * log1p(-x)) - lbeta(a, b)); + auto test = (x > inc(a) / (a + b + T0(2))); + auto xx = oneminus[test](x); + auto aa = if_else(test, b, a); + auto bb = if_else(test, a, b); + auto res = bt * betacf(xx, aa, bb) / aa; + return if_else(is_ltz(xx) || xx > one(as(x)), + allbits, + if_else(is_eqz(xx), zero, if_else(x == one(as(x)), one, oneminus[test](res)))); } - return h; - }; - auto bt = exp(fma(a, log(x), b * log1p(-x)) - lbeta(a, b)); - auto test = (x > inc(a) / (a + b + T(2))); - auto xx = oneminus[test](x); - auto aa = if_else(test, b, a); - auto bb = if_else(test, a, b); - auto res = bt * betacf(xx, aa, bb) / aa; - return if_else(is_ltz(xx) || xx > one(as(x)), - allbits, - if_else(is_eqz(xx), zero, if_else(x == one(as(x)), one, oneminus[test](res)))); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -betainc_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, betainc(t0, ts...), t0)) -{ - return mask_op(cond, eve::betainc, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -betainc_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, betainc(t0, ts...), t0)) -{ - return mask_op(cond, d(eve::betainc), t0, ts ...); -} + else + { + return arithmetic_call(betainc, x, a, b); + } + } } diff --git a/include/eve/module/special/regular/impl/betainc_inv.hpp b/include/eve/module/special/regular/impl/betainc_inv.hpp index 4a8f0e8788..3b268613a8 100644 --- a/include/eve/module/special/regular/impl/betainc_inv.hpp +++ b/include/eve/module/special/regular/impl/betainc_inv.hpp @@ -17,90 +17,71 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -betainc_inv_(EVE_SUPPORTS(cpu_), T x, U a, V b) noexcept --> common_value_t -{ - return arithmetic_call(betainc_inv, x, a, b); -} + template + common_value_t + betainc_inv_(EVE_REQUIRES(cpu_), O const&, T p, U a, V b) noexcept -template -EVE_FORCEINLINE T -betainc_inv_(EVE_SUPPORTS(cpu_), T p, T a, T b) noexcept -requires(has_native_abi_v) -{ - auto large = [](auto p, auto a, auto b) { - // Set initial guess. - auto const FiveoSix = T(5. / 6.); - auto a1 = dec(a); - auto b1 = dec(b); - auto pp = oneminus[p >= T(0.5)](p); - auto t = sqrt(-2 * log(pp)); - auto x = - fma(t, T(0.27061), T(2.30753)) / fma(t, fma(t, T(0.04481), T(0.99229)), one(as(p))) - t; - x = minus[p < T(0.5)](x); - auto al = (sqr(x) - T(3)) * T(1.0 / 6.0); - auto r2a1 = rec(a + a1); - auto r2b1 = rec(b + b1); - auto h = rec(average(r2a1, r2b1)); - auto w = (x * sqrt(al + h) / h) - (r2b1 - r2a1) * (al + FiveoSix - T(2.0 / 3.0) / h); - return a / fma(exp(w + w), b, a); - }; + if constexpr(std::same_as && std::same_as && has_native_abi_v) + { + auto large = [](auto p, auto a, auto b) + { + // Set initial guess. + auto const FiveoSix = T(5. / 6.); + auto a1 = dec(a); + auto b1 = dec(b); + auto pp = oneminus[p >= T(0.5)](p); + auto t = sqrt(-2 * log(pp)); + auto x = + fma(t, T(0.27061), T(2.30753)) / fma(t, fma(t, T(0.04481), T(0.99229)), one(as(p))) - t; + x = minus[p < T(0.5)](x); + auto al = (sqr(x) - T(3)) * T(1.0 / 6.0); + auto r2a1 = rec(a + a1); + auto r2b1 = rec(b + b1); + auto h = rec(average(r2a1, r2b1)); + auto w = (x * sqrt(al + h) / h) - (r2b1 - r2a1) * (al + FiveoSix - T(2.0 / 3.0) / h); + return a / fma(exp(w + w), b, a); + }; - auto small = [](auto p, auto a, auto b) - { - auto lna = log(a / (a + b)); - auto lnb = log(b / (a + b)); - auto t = exp(a * lna) / a; - auto u = exp(b * lnb) / b; - auto w = t + u; - auto test = p < t / w; - auto z = if_else(test, a * w * p, b * w * oneminus(p)); - auto po = pow(z, rec(if_else(test, a, b))); - return if_else(test, po, oneminus(po)); - }; - auto a1 = dec(a); - auto b1 = dec(b); - auto const o = one(as(p)); - const auto epsi = 10 * eps(as(p)); - auto test = (a > o) && (b > o); - auto x = nan(as(p)); - auto notdone = is_not_nan(p); - notdone = next_interval(large, notdone, test, x, p, a, b); - if( eve::any(notdone) ) { last_interval(small, notdone, x, p, a, b); } - auto afac = -lbeta(a, b); - ; - for( std::size_t j = 0; j < 10; ++j ) - { - auto err = betainc(x, a, b) - p; - auto t = exp(fma(a1, log(x), b1 * log1p(-x)) + afac); - auto u = err / t; // Halley: - auto m = eve::min(o, u * (a1 / x - b1 / (oneminus(x)))); - x -= u / inc(-0.5 * m); - auto tt = inc[x >= o](t); - x = if_else(is_lez(x) && (x >= o), average(x, tt), x); - if( eve::all(eve::abs(t) <= epsi * x) && j ) break; + auto small = [](auto p, auto a, auto b) + { + auto lna = log(a / (a + b)); + auto lnb = log(b / (a + b)); + auto t = exp(a * lna) / a; + auto u = exp(b * lnb) / b; + auto w = t + u; + auto test = p < t / w; + auto z = if_else(test, a * w * p, b * w * oneminus(p)); + auto po = pow(z, rec(if_else(test, a, b))); + return if_else(test, po, oneminus(po)); + }; + auto a1 = dec(a); + auto b1 = dec(b); + auto const o = one(as(p)); + const auto epsi = 10 * eps(as(p)); + auto test = (a > o) && (b > o); + auto x = nan(as(p)); + auto notdone = is_not_nan(p); + notdone = next_interval(large, notdone, test, x, p, a, b); + if( eve::any(notdone) ) { last_interval(small, notdone, x, p, a, b); } + auto afac = -lbeta(a, b); + ; + for( std::size_t j = 0; j < 10; ++j ) + { + auto err = betainc(x, a, b) - p; + auto t = exp(fma(a1, log(x), b1 * log1p(-x)) + afac); + auto u = err / t; // Halley: + auto m = eve::min(o, u * (a1 / x - b1 / (oneminus(x)))); + x -= u / inc(-0.5 * m); + auto tt = inc[x >= o](t); + x = if_else(is_lez(x) && (x >= o), average(x, tt), x); + if( eve::all(eve::abs(t) <= epsi * x) && j ) break; + } + return if_else(is_lez(p), zero, if_else(p >= o, one, x)); + } + else + { + return arithmetic_call(betainc_inv, p, a, b); + } } - return if_else(is_lez(p), zero, if_else(p >= o, one, x)); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -betainc_inv_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, betainc_inv(t0, ts...), t0)) -{ - return mask_op(cond, eve::betainc_inv, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -betainc_inv_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, betainc_inv(t0, ts...), t0)) -{ - return mask_op(cond, d(eve::betainc_inv), t0, ts ...); -} } diff --git a/include/eve/module/special/regular/impl/dawson.hpp b/include/eve/module/special/regular/impl/dawson.hpp index baecdb2b59..8c8cb7f345 100644 --- a/include/eve/module/special/regular/impl/dawson.hpp +++ b/include/eve/module/special/regular/impl/dawson.hpp @@ -9,151 +9,122 @@ #include #include -#include #include #include namespace eve::detail { -template -EVE_FORCEINLINE constexpr T -dawson_(EVE_SUPPORTS(cpu_), T a0) noexcept -{ - if constexpr( has_native_abi_v ) + template + T dawson_(EVE_REQUIRES(cpu_), O const&, T const& a0) { - using elt_t = element_type_t; - auto x = eve::abs(a0); - auto rx = x; - auto xx = sqr(x); - auto dawson1 = [](auto xx, auto x) - { - if constexpr( std::is_same_v ) - { - return x - * - eve::reverse_horner(xx, T(0x1.000000p+0f), T(-0x1.77cccap-4f), T(0x1.5a3578p-5f) - , T(-0x1.bdb92ep-11f), T(0x1.71a316p-12f), T(0x1.9d2900p-19f) - , T(0x1.ffb82ep-21f), T(0x1.4e0912p-26f), T(0x1.d2e30ap-31f) - , T(0x1.8ffb30p-37f)) - - / - eve::reverse_horner(xx, T(0x1.000000p+0f), T(0x1.265bbcp-1f), T(0x1.455fdcp-3f) - , T(0x1.c9d936p-6f), T(0x1.c92fd0p-9f), T(0x1.555662p-12f) - , T(0x1.860dcep-16f), T(0x1.559b5cp-20f), T(0x1.bfc372p-25f) - , T(0x1.993234p-30f), T(0x1.a6ddf6p-36f)) - ; - } - else - { - return x - * - eve::reverse_horner(xx, T(0x1.0000000000000p+0), T(-0x1.77ccca3261da7p-4), T(0x1.5a357714ced03p-5) - , T(-0x1.bdb92e4e9a921p-11), T(0x1.71a3163a27174p-12), T(0x1.9d29007c3bd6ap-19) - , T(0x1.ffb82d857b833p-21), T(0x1.4e09113ca0ba0p-26), T(0x1.d2e309db8f5fbp-31) - , T(0x1.8ffb30f7d51f1p-37)) - - / - eve::reverse_horner(xx, T(0x1.0000000000000p+0), T(0x1.265bbc0f09197p-1), T(0x1.455fdbcb2a008p-3) - , T(0x1.c9d9358f736f7p-6), T(0x1.c92fcf5507532p-9), T(0x1.555661ec43b62p-12) - , T(0x1.860dcd4d604a3p-16), T(0x1.559b5c414f013p-20), T(0x1.bfc372910659cp-25) - , T(0x1.993234f10c1b4p-30), T(0x1.a6ddf536ed65ap-36)) - ; - } - }; - auto dawson2 = [](auto xx, auto rx, auto x) - { - if constexpr( std::is_same_v ) - { - auto num = - eve::reverse_horner(xx, T(0x1.3bfc2ap-35f), T(-0x1.4a3b14p-29f), T(0x1.86d8bep-24f), T(-0x1.2015dep-19f) - , T(0x1.2db062p-15f), T(-0x1.bbc454p-12f), T(0x1.dffee2p-9f), T(-0x1.665626p-6f) - , T(0x1.81a4bap-4f), T(-0x1.f541cep-3f), T(0x1.0495c6p-1f)) - ; - auto denom = - eve::reverse_horner(xx, T(0x1.3bfc20p-34f), T(-0x1.51a2c0p-28f), T(0x1.961706p-23f), T(-0x1.31eec2p-18f) - , T(0x1.4795fcp-14f), T(-0x1.f105f8p-11f), T(0x1.15e2e6p-7f), T(-0x1.b3a7e0p-5f) - , T(0x1.e4c688p-3f), T(-0x1.438084p-1f), T(0x1.0p0)) - ; - return average(rx, xx * num / (denom * x)); - } - else - { - auto num = - eve::reverse_horner(xx, T(0x1.3bfc2ac32b39ep-35), T(-0x1.4a3b14d9709f0p-29), T(0x1.86d8be5016991p-24) - , T(-0x1.2015dd001fa5bp-19), T(0x1.2db061d28d773p-15), T(-0x1.bbc454e5479acp-12) - , T(0x1.dffee25eba9bdp-9), T(-0x1.66562633da983p-6), T(0x1.81a4b94e413c5p-4) - , T(-0x1.f541cdebcb905p-3), T(0x1.0495c52fe411ep-1)) - ; - auto denom = - eve::reverse_horner(xx, T(0x1.3bfc202a6b560p-34), T(-0x1.51a2c0f7cf15cp-28), T(0x1.961705729c1cdp-23) - , T(-0x1.31eec145c9b53p-18), T(0x1.4795fc069cc34p-14), T(-0x1.f105f8f05c7d8p-11) - , T(0x1.15e2e53c1fb60p-7), T(-0x1.b3a7e0ed1122bp-5), T(0x1.e4c6875173c3ep-3) - , T(-0x1.438083f2d47c7p-1), T(0x1.0p0)) - ; - return average(rx, xx * num / (denom * x)); - } - }; - auto dawson3 = [](auto xx, auto rx, auto x) + if constexpr( has_native_abi_v ) { - if constexpr( std::is_same_v ) - { - auto num = - eve::reverse_horner(xx, T(-0x1.fe79cap-12f), T(0x1.0e11acp-6f), T(-0x1.6203e2p-3f) - , T(0x1.422b20p-1f), T(-0x1.2e6230p-1f)) - ; - auto denom = - eve::reverse_horner(xx, T(-0x1.fe79cap-11f), T(0x1.1a0886p-5f), T(-0x1.932858p-2f) - , T(0x1.bb92c0p+0f), T(-0x1.595ea2p+1f), T(0x1.0p0)) - ; - return average(rx, xx * num / (denom * x)); - } - else - { - auto num = - eve::reverse_horner(xx, T(-0x1.fe79cad3d09fbp-12), T(0x1.0e11ab3d4d36bp-6), T(-0x1.6203e2f0a174ep-3) - , T(0x1.422b1f29fbcb6p-1), T(-0x1.2e622ffa7ef20p-1)) - ; - auto denom = - eve::reverse_horner(xx, T(-0x1.fe79cad3d0a8dp-11), T(0x1.1a0885fe44f2dp-5), T(-0x1.932857b438c94p-2) - , T(0x1.bb92c0388a954p+0), T(-0x1.595ea2e7576e2p+1), T(0x1.0p0)) - ; - return average(rx, xx * num / (denom * x)); - } - }; - auto dawson4 = [](auto rx) { return rx * T(0.5); }; + using elt_t = element_type_t; + auto x = eve::abs(a0); + auto rx = x; + auto xx = sqr(x); + auto dawson1 = [](auto xx, auto x) + { + if constexpr( std::is_same_v ) + { + return x * + eve::reverse_horner(xx, T(0x1.000000p+0f), T(-0x1.77cccap-4f), T(0x1.5a3578p-5f) + , T(-0x1.bdb92ep-11f), T(0x1.71a316p-12f), T(0x1.9d2900p-19f) + , T(0x1.ffb82ep-21f), T(0x1.4e0912p-26f), T(0x1.d2e30ap-31f) + , T(0x1.8ffb30p-37f)) / + eve::reverse_horner(xx, T(0x1.000000p+0f), T(0x1.265bbcp-1f), T(0x1.455fdcp-3f) + , T(0x1.c9d936p-6f), T(0x1.c92fd0p-9f), T(0x1.555662p-12f) + , T(0x1.860dcep-16f), T(0x1.559b5cp-20f), T(0x1.bfc372p-25f) + , T(0x1.993234p-30f), T(0x1.a6ddf6p-36f)) ; + } + else + { + return x * + eve::reverse_horner(xx, T(0x1.0000000000000p+0), T(-0x1.77ccca3261da7p-4), T(0x1.5a357714ced03p-5) + , T(-0x1.bdb92e4e9a921p-11), T(0x1.71a3163a27174p-12), T(0x1.9d29007c3bd6ap-19) + , T(0x1.ffb82d857b833p-21), T(0x1.4e09113ca0ba0p-26), T(0x1.d2e309db8f5fbp-31) + , T(0x1.8ffb30f7d51f1p-37)) / + eve::reverse_horner(xx, T(0x1.0000000000000p+0), T(0x1.265bbc0f09197p-1), T(0x1.455fdbcb2a008p-3) + , T(0x1.c9d9358f736f7p-6), T(0x1.c92fcf5507532p-9), T(0x1.555661ec43b62p-12) + , T(0x1.860dcd4d604a3p-16), T(0x1.559b5c414f013p-20), T(0x1.bfc372910659cp-25) + , T(0x1.993234f10c1b4p-30), T(0x1.a6ddf536ed65ap-36)); + } + }; + auto dawson2 = [](auto xx, auto rx, auto x) + { + if constexpr( std::is_same_v ) + { + auto num = + eve::reverse_horner(xx, T(0x1.3bfc2ap-35f), T(-0x1.4a3b14p-29f), T(0x1.86d8bep-24f), T(-0x1.2015dep-19f) + , T(0x1.2db062p-15f), T(-0x1.bbc454p-12f), T(0x1.dffee2p-9f), T(-0x1.665626p-6f) + , T(0x1.81a4bap-4f), T(-0x1.f541cep-3f), T(0x1.0495c6p-1f)) + ; + auto denom = + eve::reverse_horner(xx, T(0x1.3bfc20p-34f), T(-0x1.51a2c0p-28f), T(0x1.961706p-23f), T(-0x1.31eec2p-18f) + , T(0x1.4795fcp-14f), T(-0x1.f105f8p-11f), T(0x1.15e2e6p-7f), T(-0x1.b3a7e0p-5f) + , T(0x1.e4c688p-3f), T(-0x1.438084p-1f), T(0x1.0p0)) + ; + return average(rx, xx * num / (denom * x)); + } + else + { + auto num = + eve::reverse_horner(xx, T(0x1.3bfc2ac32b39ep-35), T(-0x1.4a3b14d9709f0p-29), T(0x1.86d8be5016991p-24) + , T(-0x1.2015dd001fa5bp-19), T(0x1.2db061d28d773p-15), T(-0x1.bbc454e5479acp-12) + , T(0x1.dffee25eba9bdp-9), T(-0x1.66562633da983p-6), T(0x1.81a4b94e413c5p-4) + , T(-0x1.f541cdebcb905p-3), T(0x1.0495c52fe411ep-1)) + ; + auto denom = + eve::reverse_horner(xx, T(0x1.3bfc202a6b560p-34), T(-0x1.51a2c0f7cf15cp-28), T(0x1.961705729c1cdp-23) + , T(-0x1.31eec145c9b53p-18), T(0x1.4795fc069cc34p-14), T(-0x1.f105f8f05c7d8p-11) + , T(0x1.15e2e53c1fb60p-7), T(-0x1.b3a7e0ed1122bp-5), T(0x1.e4c6875173c3ep-3) + , T(-0x1.438083f2d47c7p-1), T(0x1.0p0)) + ; + return average(rx, xx * num / (denom * x)); + } + }; + auto dawson3 = [](auto xx, auto rx, auto x) + { + if constexpr( std::is_same_v ) + { + auto num = + eve::reverse_horner(xx, T(-0x1.fe79cap-12f), T(0x1.0e11acp-6f), T(-0x1.6203e2p-3f) + , T(0x1.422b20p-1f), T(-0x1.2e6230p-1f)) + ; + auto denom = + eve::reverse_horner(xx, T(-0x1.fe79cap-11f), T(0x1.1a0886p-5f), T(-0x1.932858p-2f) + , T(0x1.bb92c0p+0f), T(-0x1.595ea2p+1f), T(0x1.0p0)); + return average(rx, xx * num / (denom * x)); + } + else + { + auto num = + eve::reverse_horner(xx, T(-0x1.fe79cad3d09fbp-12), T(0x1.0e11ab3d4d36bp-6), T(-0x1.6203e2f0a174ep-3) + , T(0x1.422b1f29fbcb6p-1), T(-0x1.2e622ffa7ef20p-1)); + auto denom = + eve::reverse_horner(xx, T(-0x1.fe79cad3d0a8dp-11), T(0x1.1a0885fe44f2dp-5), T(-0x1.932857b438c94p-2) + , T(0x1.bb92c0388a954p+0), T(-0x1.595ea2e7576e2p+1), T(0x1.0p0)); + return average(rx, xx * num / (denom * x)); + } + }; + auto dawson4 = [](auto rx) { return rx * T(0.5); }; - auto r = nan(as()); - auto notdone = is_not_nan(x); - notdone = next_interval(dawson1, notdone, x < elt_t(3.25), r, xx, x); - rx = rec(x); - xx = sqr(rx); - if( eve::any(notdone) ) - { - notdone = next_interval(dawson2, notdone, x < elt_t(6.25), r, xx, rx, x); + auto r = nan(as()); + auto notdone = is_not_nan(x); + notdone = next_interval(dawson1, notdone, x < elt_t(3.25), r, xx, x); + rx = rec(x); + xx = sqr(rx); if( eve::any(notdone) ) { - notdone = next_interval(dawson3, notdone, x < elt_t(1.0e9), r, xx, rx, x); - if( eve::any(notdone) ) { last_interval(dawson4, x >= elt_t(1.0e9), r, rx); } + notdone = next_interval(dawson2, notdone, x < elt_t(6.25), r, xx, rx, x); + if( eve::any(notdone) ) + { + notdone = next_interval(dawson3, notdone, x < elt_t(1.0e9), r, xx, rx, x); + if( eve::any(notdone) ) { last_interval(dawson4, x >= elt_t(1.0e9), r, rx); } + } } + return eve::copysign(r, a0); } - return eve::copysign(r, a0); + else return apply_over(dawson, a0); } - else return apply_over(dawson, a0); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -dawson_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::dawson, ts ...); -} - -template -EVE_FORCEINLINE auto -dawson_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::dawson), ts ...); -} } diff --git a/include/eve/module/special/regular/impl/digamma.hpp b/include/eve/module/special/regular/impl/digamma.hpp index faf8695dd7..153a6f46f1 100644 --- a/include/eve/module/special/regular/impl/digamma.hpp +++ b/include/eve/module/special/regular/impl/digamma.hpp @@ -15,181 +15,164 @@ namespace eve::detail { -template -EVE_FORCEINLINE T -digamma_(EVE_SUPPORTS(cpu_), T x) noexcept -{ - using elt_t = element_type_t; - if constexpr( has_native_abi_v ) + template + T digamma_(EVE_REQUIRES(cpu_), O const&, T x) { - auto dlarge = (std::is_same_v) ? 20 : 10; - auto br_1_2 = [](auto x, auto result) + using elt_t = element_type_t; + if constexpr( has_native_abi_v ) { - // computes digamma(a0)/a0 for double or double vectors - // xx is sqr(a0) and 0 <= abs(a0) <= 3.25 - T y(0.99558162689208984); - T root1(1569415565.0 / 1073741824uL); - T root2((381566830.0 / 1073741824uL) / 1073741824uL); - T root3(0.9016312093258695918615325266959189453125e-19); + auto dlarge = (std::is_same_v) ? 20 : 10; + auto br_1_2 = [](auto x, auto result) + { + // computes digamma(a0)/a0 for double or double vectors + // xx is sqr(a0) and 0 <= abs(a0) <= 3.25 + T y(0.99558162689208984); + T root1(1569415565.0 / 1073741824uL); + T root2((381566830.0 / 1073741824uL) / 1073741824uL); + T root3(0.9016312093258695918615325266959189453125e-19); - auto g = x - root1; - g -= root2; - g -= root3; - x = dec(x); - if constexpr( std::is_same_v ) - { - auto r = - eve::reverse_horner(x, T(0x1.04e9e69894978p-2), T(-0x1.4d5d0f9ab412fp-2), T(-0x1.4cf68d26e295ap-1) - , T(-0x1.2821c13c5e2bfp-2), T(-0x1.72b2e63723c78p-5), T(-0x1.0f7e5f66c2537p-9)) + auto g = x - root1; + g -= root2; + g -= root3; + x = dec(x); + if constexpr( std::is_same_v ) + { + auto r = + eve::reverse_horner(x, T(0x1.04e9e69894978p-2), T(-0x1.4d5d0f9ab412fp-2), T(-0x1.4cf68d26e295ap-1) + , T(-0x1.2821c13c5e2bfp-2), T(-0x1.72b2e63723c78p-5), T(-0x1.0f7e5f66c2537p-9)) - / - eve::reverse_horner(x, T(0x1.0000000000000p+0), T(0x1.09d1b06674d41p+1), T(0x1.75eb79397c930p+0) - , T(0x1.be65d28de361cp-2), T(0x1.bb9c8cc612ca3p-5), T(0x1.16fc90a0a1908p-9) - , T(-0x1.2b84f95bbf448p-21)) - ; - return fma(g, y, g * r) + result; - } - else - { - auto r = - eve::reverse_horner(x, T(0x1.04e9e6p-2f), T(-0x1.4d5d10p-2f), T(-0x1.4cf68ep-1f), T(-0x1.2821c2p-2f) - , T(-0x1.72b2e6p-5f), T(-0x1.0f7e60p-9f)) + / + eve::reverse_horner(x, T(0x1.0000000000000p+0), T(0x1.09d1b06674d41p+1), T(0x1.75eb79397c930p+0) + , T(0x1.be65d28de361cp-2), T(0x1.bb9c8cc612ca3p-5), T(0x1.16fc90a0a1908p-9) + , T(-0x1.2b84f95bbf448p-21)) + ; + return fma(g, y, g * r) + result; + } + else + { + auto r = + eve::reverse_horner(x, T(0x1.04e9e6p-2f), T(-0x1.4d5d10p-2f), T(-0x1.4cf68ep-1f), T(-0x1.2821c2p-2f) + , T(-0x1.72b2e6p-5f), T(-0x1.0f7e60p-9f)) - / - eve::reverse_horner(x, T(0x1.000000p+0f), T(0x1.09d1b0p+1f), T(0x1.75eb7ap+0f), T(0x1.be65d2p-2f) - , T(0x1.bb9c8cp-5f), T(0x1.16fc90p-9f), T(-0x1.2b84fap-21f)) - ; - return fma(g, y, g * r) + result; - } - }; + / + eve::reverse_horner(x, T(0x1.000000p+0f), T(0x1.09d1b0p+1f), T(0x1.75eb7ap+0f), T(0x1.be65d2p-2f) + , T(0x1.bb9c8cp-5f), T(0x1.16fc90p-9f), T(-0x1.2b84fap-21f)) + ; + return fma(g, y, g * r) + result; + } + }; - auto br_large = [](auto x, auto result) - { - // if we're above the lower-limit for the asymptotic expansion then use it: - x = dec(x); - result += log(x); - result += rec(x + x); - auto z = rec(sqr(x)); - T y(0); - if constexpr( std::is_same_v ) - { - y = - eve::reverse_horner(z, T(0x1.5555555555555p-4), T(-0x1.1111111111111p-7), T(0x1.0410410410410p-8) - , T(-0x1.1111111111111p-8), T(0x1.f07c1f07c1f08p-8), T(-0x1.5995995995996p-6) - , T(0x1.5555555555555p-4), T(-0x1.c5e5e5e5e5e5ep-2)) - ; - } - else - { - y = - eve::reverse_horner(z, T(0x1.555556p-4f), T(-0x1.111112p-7f), T(0x1.041042p-8f), T(-0x1.111112p-8f) - , T(0x1.f07c20p-8f), T(-0x1.59959ap-6f), T(0x1.555556p-4f), T(-0x1.c5e5e6p-2f)) - ; - } - result -= z * y; - return result; - }; + auto br_large = [](auto x, auto result) + { + // if we're above the lower-limit for the asymptotic expansion then use it: + x = dec(x); + result += log(x); + result += rec(x + x); + auto z = rec(sqr(x)); + T y(0); + if constexpr( std::is_same_v ) + { + y = + eve::reverse_horner(z, T(0x1.5555555555555p-4), T(-0x1.1111111111111p-7), T(0x1.0410410410410p-8) + , T(-0x1.1111111111111p-8), T(0x1.f07c1f07c1f08p-8), T(-0x1.5995995995996p-6) + , T(0x1.5555555555555p-4), T(-0x1.c5e5e5e5e5e5ep-2)) + ; + } + else + { + y = + eve::reverse_horner(z, T(0x1.555556p-4f), T(-0x1.111112p-7f), T(0x1.041042p-8f), T(-0x1.111112p-8f) + , T(0x1.f07c20p-8f), T(-0x1.59959ap-6f), T(0x1.555556p-4f), T(-0x1.c5e5e6p-2f)) + ; + } + result -= z * y; + return result; + }; - if constexpr( scalar_value ) - { - auto result = zero(as(x)); - if( x == 0 ) return copysign(inf(as(x)), x); - if( x < 0 ) + if constexpr( scalar_value ) { - if( 0 && (x > -1) ) result = -x; - else + auto result = zero(as(x)); + if( x == 0 ) return copysign(inf(as(x)), x); + if( x < 0 ) { - x = oneminus(x); - result = x - floor(x); + if( 0 && (x > -1) ) result = -x; + else + { + x = oneminus(x); + result = x - floor(x); + } + if( result > 0.5 ) result -= 1; + if( result == 0.5 ) result = zero(as(x)); + else if( result ) result = pi(as(x)) * cotpi(result); + else result = nan(as(x)); + // we are ready to increment result that was + // Pi()/tanpi(remainder) if a0 < 0 and remainder != 0 + // Nan if a0 < 0 and remainder == 0 + // 0 in any other cases } - if( result > 0.5 ) result -= 1; - if( result == 0.5 ) result = zero(as(x)); - else if( result ) result = pi(as(x)) * cotpi(result); - else result = nan(as(x)); - // we are ready to increment result that was - // Pi()/tanpi(remainder) if a0 < 0 and remainder != 0 - // Nan if a0 < 0 and remainder == 0 - // 0 in any other cases - } - if( x >= dlarge ) - { // If we're above the lower-limit for the asymptotic expansion then use it: - return br_large(x, result); - } - // If x > 2 reduce to the interval [1,2]: - while( x > 2 ) - { - x -= 1; - result += 1 / x; - } - // If x < 1 use shift to > 1: - if( x < 1 ) - { - result = -1 / x; - x += 1; - } - return br_1_2(x, result); - } - else // simd - { - x = if_else(is_ltz(x) && is_flint(x), allbits, x); - auto notdone = is_not_nan(x); - auto result = zero(as(x)); - auto test = is_lez(x); - if( eve::any(test) ) - { - auto a = x; - x = oneminus[test](x); - auto remainder = frac(x); - remainder = dec[remainder > 0.5](remainder); - remainder = if_else(is_eqz(remainder), nan(as(x)), remainder); - remainder = if_else(remainder == T(0.5), zero, pi(as(x)) * cotpi(remainder)); - result = if_else(is_eqz(a), copysign(inf(as(x)), a), remainder); - result = if_else(test, result, zero); + if( x >= dlarge ) + { // If we're above the lower-limit for the asymptotic expansion then use it: + return br_large(x, result); + } + // If x > 2 reduce to the interval [1,2]: + while( x > 2 ) + { + x -= 1; + result += 1 / x; + } + // If x < 1 use shift to > 1: + if( x < 1 ) + { + result = -1 / x; + x += 1; + } + return br_1_2(x, result); } - auto r = nan(as()); - if( eve::any(notdone) ) + else // simd { - notdone = next_interval(br_large, notdone, x >= dlarge, r, x, result); + x = if_else(is_ltz(x) && is_flint(x), allbits, x); + auto notdone = is_not_nan(x); + auto result = zero(as(x)); + auto test = is_lez(x); + if( eve::any(test) ) + { + auto a = x; + x = oneminus[test](x); + auto remainder = frac(x); + remainder = dec[remainder > 0.5](remainder); + remainder = if_else(is_eqz(remainder), nan(as(x)), remainder); + remainder = if_else(remainder == T(0.5), zero, pi(as(x)) * cotpi(remainder)); + result = if_else(is_eqz(a), copysign(inf(as(x)), a), remainder); + result = if_else(test, result, zero); + } + auto r = nan(as()); if( eve::any(notdone) ) { - // If x > 2 reduce to the interval [1,2]: - x = if_else(x > dlarge, one, x); - auto cond = x > T(2); - while( eve::any(cond) ) + notdone = next_interval(br_large, notdone, x >= dlarge, r, x, result); + if( eve::any(notdone) ) { - x = dec[cond](x); - result = add[cond](result, rec(x)); - cond = x > T(2); + // If x > 2 reduce to the interval [1,2]: + x = if_else(x > dlarge, one, x); + auto cond = x > T(2); + while( eve::any(cond) ) + { + x = dec[cond](x); + result = add[cond](result, rec(x)); + cond = x > T(2); + } + cond = x < T(1); + while( eve::any(cond) ) + { + result = add[cond](result, -rec(x)); + x = inc[cond](x); + cond = x < T(1); + } + notdone = last_interval(br_1_2, notdone, r, x, result); } - cond = x < T(1); - while( eve::any(cond) ) - { - result = add[cond](result, -rec(x)); - x = inc[cond](x); - cond = x < T(1); - } - notdone = last_interval(br_1_2, notdone, r, x, result); } + return r; } - return r; } + else return apply_over(digamma, x); } - else return apply_over(digamma, x); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -digamma_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::digamma, ts ...); -} - -template -EVE_FORCEINLINE auto -digamma_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::digamma), ts ...); -} } diff --git a/include/eve/module/special/regular/impl/double_factorial.hpp b/include/eve/module/special/regular/impl/double_factorial.hpp index 1bfee5a422..3feb517f8f 100644 --- a/include/eve/module/special/regular/impl/double_factorial.hpp +++ b/include/eve/module/special/regular/impl/double_factorial.hpp @@ -14,32 +14,33 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -double_factorial_(EVE_SUPPORTS(cpu_), T i0) noexcept -{ + template + EVE_FORCEINLINE + as_wide_as_t + double_factorial_(EVE_REQUIRES(cpu_), O const&, T i0) noexcept + { if constexpr( has_native_abi_v ) { auto odd = [](auto i) - { - auto n = i >> 1; - auto r = (factorial(i) / ldexp(factorial(n), n)); - auto test = i <= 171; - if( eve::all(test) ) return r; - else { - constexpr double invsqrtpi = 0.564189583547756286948079451560772585844050629329; - auto r1 = tgamma(fma(float64(i), 0.5, 1.0)) * invsqrtpi; - return if_else(test, r, ldexp(r1, inc(i) >> 1)); - } - }; + auto n = i >> 1; + auto r = (factorial(i) / ldexp(factorial(n), n)); + auto test = i <= 171; + if( eve::all(test) ) return r; + else + { + constexpr double invsqrtpi = 0.564189583547756286948079451560772585844050629329; + auto r1 = tgamma(fma(float64(i), 0.5, 1.0)) * invsqrtpi; + return if_else(test, r, ldexp(r1, inc(i) >> 1)); + } + }; auto even = [](auto i) - { - auto n = i >> 1; - auto r = factorial(n); - return ldexp(r, n); - }; + { + auto n = i >> 1; + auto r = factorial(n); + return ldexp(r, n); + }; auto i = uint64(i0); auto r = inf(as()); // perhaps 0 should be fine auto notdone = i <= 300; @@ -48,5 +49,5 @@ double_factorial_(EVE_SUPPORTS(cpu_), T i0) noexcept return r; } else return apply_over(double_factorial, i0); -} + } } diff --git a/include/eve/module/special/regular/impl/erf.hpp b/include/eve/module/special/regular/impl/erf.hpp index 4db7094189..8969f50c00 100644 --- a/include/eve/module/special/regular/impl/erf.hpp +++ b/include/eve/module/special/regular/impl/erf.hpp @@ -13,132 +13,123 @@ namespace eve::detail { -template -auto -erf_(EVE_SUPPORTS(cpu_), T a0) noexcept -{ - if constexpr(scalar_value) + template + T + erf_(EVE_REQUIRES(cpu_), O const&, T a0) noexcept { - if constexpr( eve::platform::supports_invalids ) - if( is_nan(a0) ) return a0; - if constexpr( eve::platform::supports_infinites ) - if( eve::is_infinite(a0) ) return signnz(a0); - if constexpr( std::is_same_v ) + if constexpr(scalar_value) { - T y = eve::abs(a0); - if( y <= 0.46875 ) // 15/32 - { - T ysq = if_else(y > epso_2(as()), sqr(y), eve::zero); - return kernel1_erf1(a0, ysq); - } - else + if constexpr( eve::platform::supports_invalids ) + if( is_nan(a0) ) return a0; + if constexpr( eve::platform::supports_infinites ) + if( eve::is_infinite(a0) ) return signnz(a0); + if constexpr( std::is_same_v ) { - if( a0 > 26.543 ) return sign(a0); - else if( y <= 4 ) + T y = eve::abs(a0); + if( y <= 0.46875 ) // 15/32 { - T res = kernel1_erf2(a0, y); - res = kernel1_finalize2(res, y); - res = (half(as()) - res) + half(as()); - if( is_ltz(a0) ) res = -res; - return res; + T ysq = if_else(y > epso_2(as()), sqr(y), eve::zero); + return kernel1_erf1(a0, ysq); } - else // if (y <= 26.543) + else { - T res = kernel1_erf3(a0, y); - res = kernel1_finalize2(res, y); - res = (half(as()) - res) + half(as()); - if( is_ltz(a0) ) res = -res; - return res; + if( a0 > 26.543 ) return sign(a0); + else if( y <= 4 ) + { + T res = kernel1_erf2(a0, y); + res = kernel1_finalize2(res, y); + res = (half(as()) - res) + half(as()); + if( is_ltz(a0) ) res = -res; + return res; + } + else // if (y <= 26.543) + { + T res = kernel1_erf3(a0, y); + res = kernel1_finalize2(res, y); + res = (half(as()) - res) + half(as()); + if( is_ltz(a0) ) res = -res; + return res; + } } } - } - else if constexpr( std::is_same_v ) - { - T x = eve::abs(a0); - if( x < 6.6666667e-01f ) // 2/3 + else if constexpr( std::is_same_v ) { - return a0 * kernel_erf1(sqr(x)); - } - else - { - T z = x / inc(x) - 0.4f; // Ratio(); - T r2 = oneminus(exp(-sqr(x)) * kernel_erfc2(z)); - if( is_ltz(a0) ) r2 = -r2; - return r2; + T x = eve::abs(a0); + if( x < 6.6666667e-01f ) // 2/3 + { + return a0 * kernel_erf1(sqr(x)); + } + else + { + T z = x / inc(x) - 0.4f; // Ratio(); + T r2 = oneminus(exp(-sqr(x)) * kernel_erfc2(z)); + if( is_ltz(a0) ) r2 = -r2; + return r2; + } } } - } - else //simd case - { - if constexpr( has_native_abi_v ) + else //simd case { - using elt_t = element_type_t; - if constexpr( std::is_same_v ) + if constexpr( has_native_abi_v ) { - T y = eve::abs(a0); - y = if_else(y > T(26.543), T(26.543), y); - T sqry = eve::sqr(y); - auto test1 = eve::is_less(y, T(0.46875)); // 15/32; - T r1 = eve::zero(as()); - auto nb = eve::count_true(test1); - if( nb > 0 ) // here y < 0.46875 + using elt_t = element_type_t; + if constexpr( std::is_same_v ) { - T ysq = if_else(y > epso_2(as()), sqry, eve::zero); - r1 = kernel1_erf1(a0, ysq); - if( nb == T::size() ) return r1; - } - auto test2 = (y <= T(4)); - auto test3 = logical_andnot(test2, test1); + T y = eve::abs(a0); + y = if_else(y > T(26.543), T(26.543), y); + T sqry = eve::sqr(y); + auto test1 = eve::is_less(y, T(0.46875)); // 15/32; + T r1 = eve::zero(as()); + auto nb = eve::count_true(test1); + if( nb > 0 ) // here y < 0.46875 + { + T ysq = if_else(y > epso_2(as()), sqry, eve::zero); + r1 = kernel1_erf1(a0, ysq); + if( nb == T::size() ) return r1; + } + auto test2 = (y <= T(4)); + auto test3 = logical_andnot(test2, test1); - auto nb1 = eve::count_true(test3); - if( nb1 > 0 ) // here we treat 0.46875 <= y and y <= 4 - { - T res = kernel1_erf2(a0, y); + auto nb1 = eve::count_true(test3); + if( nb1 > 0 ) // here we treat 0.46875 <= y and y <= 4 + { + T res = kernel1_erf2(a0, y); + res = kernel1_finalize2(res, y); + res = (half(as()) - res) + half(as()); + res = minus[is_ltz(a0)](res); + r1 = if_else(test3, res, r1); + if( nb + nb1 == T::size() ) return r1; + } + // here we treat y > 4 + T res = kernel1_erf3(a0, y); res = kernel1_finalize2(res, y); res = (half(as()) - res) + half(as()); res = minus[is_ltz(a0)](res); - r1 = if_else(test3, res, r1); - if( nb + nb1 == T::size() ) return r1; + r1 = if_else(test2, r1, res); + return r1; } - // here we treat y > 4 - T res = kernel1_erf3(a0, y); - res = kernel1_finalize2(res, y); - res = (half(as()) - res) + half(as()); - res = minus[is_ltz(a0)](res); - r1 = if_else(test2, r1, res); - return r1; - } - else if constexpr( std::is_same_v ) - { - T x = eve::abs(a0); - T r1 = eve::zero(as()); - auto test1 = eve::is_less(x, 6.6666667e-01f); // Ratio()); - auto nb = eve::count_true(test1); - if( nb > 0 ) + else if constexpr( std::is_same_v ) { - r1 = a0 * kernel_erf1(sqr(x)); - if( nb >= T::size() ) return r1; + T x = eve::abs(a0); + T r1 = eve::zero(as()); + auto test1 = eve::is_less(x, 6.6666667e-01f); // Ratio()); + auto nb = eve::count_true(test1); + if( nb > 0 ) + { + r1 = a0 * kernel_erf1(sqr(x)); + if( nb >= T::size() ) return r1; + } + T z = x / inc(x); + z -= T(0.4); + T r2 = oneminus(exp(-sqr(x)) * kernel_erfc2(z)); + r2 = minus[is_ltz(a0)](r2); + r1 = if_else(test1, r1, r2); + if constexpr( eve::platform::supports_infinites ) + r1 = eve::if_else(eve::is_infinite(a0), eve::sign(a0), r1); + return r1; } - T z = x / inc(x); - z -= T(0.4); - T r2 = oneminus(exp(-sqr(x)) * kernel_erfc2(z)); - r2 = minus[is_ltz(a0)](r2); - r1 = if_else(test1, r1, r2); - if constexpr( eve::platform::supports_infinites ) - r1 = eve::if_else(eve::is_infinite(a0), eve::sign(a0), r1); - return r1; } + else return apply_over(erf, a0); } - else return apply_over(erf, a0); } } - -// ----------------------------------------------------------------------------------------------- -// Masked case -template -EVE_FORCEINLINE auto -erf_(EVE_SUPPORTS(cpu_), C const& cond, U const& t) noexcept -{ - return mask_op(cond, eve::erf, t); -} -} diff --git a/include/eve/module/special/regular/impl/erf_inv.hpp b/include/eve/module/special/regular/impl/erf_inv.hpp index fccf57199a..eff12fb577 100644 --- a/include/eve/module/special/regular/impl/erf_inv.hpp +++ b/include/eve/module/special/regular/impl/erf_inv.hpp @@ -14,9 +14,9 @@ namespace eve::detail { - template + template EVE_FORCEINLINE T - erf_inv_(EVE_SUPPORTS(cpu_), T a0) noexcept + erf_inv_(EVE_REQUIRES(cpu_), O const&, T a0) noexcept { using elt_t = element_type_t; if constexpr( has_native_abi_v ) @@ -108,13 +108,4 @@ namespace eve::detail } else return apply_over(erf_inv, a0); } - -// ----------------------------------------------------------------------------------------------- -// Masked case - template - EVE_FORCEINLINE auto - erf_inv_(EVE_SUPPORTS(cpu_), C const& cond, U const& t) noexcept - { - return mask_op(cond, eve::erf_inv, t); - } } diff --git a/include/eve/module/special/regular/impl/erfc.hpp b/include/eve/module/special/regular/impl/erfc.hpp index d42aa8255f..546f5eebda 100644 --- a/include/eve/module/special/regular/impl/erfc.hpp +++ b/include/eve/module/special/regular/impl/erfc.hpp @@ -13,129 +13,119 @@ namespace eve::detail { -template -auto -erfc_(EVE_SUPPORTS(cpu_), T x) noexcept -{ - if constexpr(scalar_value) + template + T erfc_(EVE_REQUIRES(cpu_), O const&, T x) noexcept { - if constexpr( eve::platform::supports_invalids ) - if( is_nan(x) ) return x; - if constexpr( eve::platform::supports_infinites ) - if( eve::is_infinite(x) ) return oneminus(signnz(x)); - if( is_eqz(x) ) return T(1); - if constexpr( std::is_same_v ) + if constexpr(scalar_value) { - T y = eve::abs(x); - if( y <= 0.46875 ) // 15/32 - { - T ysq = if_else(y > epso_2(as()), sqr(y), eve::zero); - T res = kernel1_erf1(x, ysq); - res = oneminus(res); - return res; - } - else if( y <= 4.0 ) - { - T res = kernel1_erf2(x, y); - res = kernel1_finalize2(res, y); - if( is_ltz(x) ) res = 2.0 - res; - return res; - } - else if( y <= 26.543 ) + if constexpr( eve::platform::supports_invalids ) + if( is_nan(x) ) return x; + if constexpr( eve::platform::supports_infinites ) + if( eve::is_infinite(x) ) return oneminus(signnz(x)); + if( is_eqz(x) ) return T(1); + if constexpr( std::is_same_v ) { - T res = kernel1_erf3(x, y); - res = kernel1_finalize2(res, y); - if( is_ltz(x) ) res = T(2) - res; - return res; + T y = eve::abs(x); + if( y <= 0.46875 ) // 15/32 + { + T ysq = if_else(y > epso_2(as()), sqr(y), eve::zero); + T res = kernel1_erf1(x, ysq); + res = oneminus(res); + return res; + } + else if( y <= 4.0 ) + { + T res = kernel1_erf2(x, y); + res = kernel1_finalize2(res, y); + if( is_ltz(x) ) res = 2.0 - res; + return res; + } + else if( y <= 26.543 ) + { + T res = kernel1_erf3(x, y); + res = kernel1_finalize2(res, y); + if( is_ltz(x) ) res = T(2) - res; + return res; + } + else return (is_ltz(x)) ? T(2) : zero(as()); } - else return (is_ltz(x)) ? T(2) : zero(as()); - } - else if constexpr( std::is_same_v ) - { - T xx = eve::abs(x); - T r1 = zero(as()); - T z = xx / inc(xx); - if( 3.0f * xx < 2.0f ) { r1 = kernel_erfc3(z); } - else + else if constexpr( std::is_same_v ) { - z -= 0.4f; - r1 = exp(-sqr(xx)) * kernel_erfc2(z); + T xx = eve::abs(x); + T r1 = zero(as()); + T z = xx / inc(xx); + if( 3.0f * xx < 2.0f ) { r1 = kernel_erfc3(z); } + else + { + z -= 0.4f; + r1 = exp(-sqr(xx)) * kernel_erfc2(z); + } + return (x < 0.0f) ? 2.0f - r1 : r1; } - return (x < 0.0f) ? 2.0f - r1 : r1; } - } - else //simd case - { - if constexpr( has_native_abi_v ) + else //simd case { - auto a0 = x; - using elt_t = element_type_t; - if constexpr( std::is_same_v ) + if constexpr( has_native_abi_v ) { - T y = eve::abs(a0); - y = if_else(y > T(26.543), T(27), y); - T sqry = sqr(y); - auto test1 = eve::is_less(y, T(0.46875)); // 15/32; - T r1 = eve::zero(as()); - auto nb = eve::count_true(test1); - if( nb > 0 ) // here y < 0.46875 + auto a0 = x; + using elt_t = element_type_t; + if constexpr( std::is_same_v ) { - T ysq = if_else(y > epso_2(as()), sqry, eve::zero); - r1 = kernel1_erf1(a0, ysq); - r1 = oneminus(r1); - if( nb == T::size() ) return r1; - } - auto test2 = (y <= T(4)); - auto test3 = logical_andnot(test2, test1); + T y = eve::abs(a0); + y = if_else(y > T(26.543), T(27), y); + T sqry = sqr(y); + auto test1 = eve::is_less(y, T(0.46875)); // 15/32; + T r1 = eve::zero(as()); + auto nb = eve::count_true(test1); + if( nb > 0 ) // here y < 0.46875 + { + T ysq = if_else(y > epso_2(as()), sqry, eve::zero); + r1 = kernel1_erf1(a0, ysq); + r1 = oneminus(r1); + if( nb == T::size() ) return r1; + } + auto test2 = (y <= T(4)); + auto test3 = logical_andnot(test2, test1); - auto nb1 = eve::count_true(test3); - if( nb1 > 0 ) // here we treat 0.46875 <= y and y <= 4 - { - T res = kernel1_erf2(a0, y); + auto nb1 = eve::count_true(test3); + if( nb1 > 0 ) // here we treat 0.46875 <= y and y <= 4 + { + T res = kernel1_erf2(a0, y); + res = kernel1_finalize2(res, y); + res = if_else(is_ltz(a0), 2.0 - res, res); + r1 = if_else(test3, res, r1); + if( nb + nb1 == T::size() ) return r1; + } + // here we treat y > 4 + T res = kernel1_erf3(a0, y); res = kernel1_finalize2(res, y); res = if_else(is_ltz(a0), 2.0 - res, res); - r1 = if_else(test3, res, r1); - if( nb + nb1 == T::size() ) return r1; + r1 = if_else(test2, r1, res); + return r1; } - // here we treat y > 4 - T res = kernel1_erf3(a0, y); - res = kernel1_finalize2(res, y); - res = if_else(is_ltz(a0), 2.0 - res, res); - r1 = if_else(test2, r1, res); - return r1; - } - else if constexpr( std::is_same_v ) - { - T x = eve::abs(a0); - auto test0 = eve::is_ltz(a0); - T r1 = zero(as()); - auto test1 = eve::is_less(x, 6.6666667e-01f); - T z = x / inc(x); - - auto nb = eve::count_true(test1); - if( nb > 0 ) + else if constexpr( std::is_same_v ) { - r1 = kernel_erfc3(z); - if( nb >= T::size() ) return eve::if_else(test0, T(2) - r1, r1); - } - z -= T(0.4); - T r2 = exp(-sqr(x)) * kernel_erfc2(z); - r1 = if_else(test1, r1, r2); - if constexpr( eve::platform::supports_infinites ) r1 = if_else(x == inf(as()), zero, r1); + T x = eve::abs(a0); + auto test0 = eve::is_ltz(a0); + T r1 = zero(as()); + auto test1 = eve::is_less(x, 6.6666667e-01f); + T z = x / inc(x); + + auto nb = eve::count_true(test1); + if( nb > 0 ) + { + r1 = kernel_erfc3(z); + if( nb >= T::size() ) return eve::if_else(test0, T(2) - r1, r1); + } + z -= T(0.4); + T r2 = exp(-sqr(x)) * kernel_erfc2(z); + r1 = if_else(test1, r1, r2); + if constexpr( eve::platform::supports_infinites ) r1 = if_else(x == inf(as()), zero, r1); - return eve::if_else(test0, T(2) - r1, r1); + return eve::if_else(test0, T(2) - r1, r1); + } } + else return apply_over(erfc, x); } - else return apply_over(erfc, x); } } - -// ----------------------------------------------------------------------------------------------- -// Masked case -template -EVE_FORCEINLINE auto -erfc_(EVE_SUPPORTS(cpu_), C const& cond, U const& t) noexcept -{ - return mask_op(cond, eve::erfc, t); -} -} diff --git a/include/eve/module/special/regular/impl/erfc_inv.hpp b/include/eve/module/special/regular/impl/erfc_inv.hpp index 3fdb7da9f0..867e950141 100644 --- a/include/eve/module/special/regular/impl/erfc_inv.hpp +++ b/include/eve/module/special/regular/impl/erfc_inv.hpp @@ -9,14 +9,12 @@ #include #include -#include #include namespace eve::detail { - template - EVE_FORCEINLINE T - erfc_inv_(EVE_SUPPORTS(cpu_), T a0) noexcept + template + T erfc_inv_(EVE_REQUIRES(cpu_), O const&, T a0) noexcept { using elt_t = element_type_t; if constexpr( has_native_abi_v ) @@ -48,8 +46,8 @@ namespace eve::detail , T(0x1.4c58aep-8f), T(0x1.7515dcp-9f)) / - eve::reverse_horner(w, T(0x1.5e8044p-7f), T(-0x1.bb0154p-7f), T(0x1.5204bep-8f) - , T(0x1.74e782p-9f), T(0x1.2c6364p-27f)) + eve::reverse_horner(w, T(0x1.5e8044p-7f), T(-0x1.bb0154p-7f), T(0x1.5204bep-8f) + , T(0x1.74e782p-9f), T(0x1.2c6364p-27f)) ; return copysign(h, a00); }; @@ -98,7 +96,7 @@ namespace eve::detail , T(0x1.05ac6a8fba182p-27), T(-0x1.0468fb24e2f5fp-28), T(0x1.9e6bf2dda45e3p-30) , T(-0x1.18feec0e38727p-32), T(-0x1.dcec3a7785389p-36)) ; - }; + }; notdone = last_interval(br_wge16, notdone, r, w); } } @@ -107,13 +105,4 @@ namespace eve::detail } else return apply_over(erfc_inv, a0); } - -// ----------------------------------------------------------------------------------------------- -// Masked case - template - EVE_FORCEINLINE auto - erfc_inv_(EVE_SUPPORTS(cpu_), C const& cond, U const& t) noexcept - { - return mask_op(cond, eve::erfc_inv, t); - } } diff --git a/include/eve/module/special/regular/impl/erfcx.hpp b/include/eve/module/special/regular/impl/erfcx.hpp index b66abb51ef..b81a6578fe 100644 --- a/include/eve/module/special/regular/impl/erfcx.hpp +++ b/include/eve/module/special/regular/impl/erfcx.hpp @@ -15,10 +15,9 @@ namespace eve::detail { - template - EVE_FORCEINLINE constexpr T - erfcx_(EVE_SUPPORTS(cpu_), D const&, T x) noexcept - requires(is_one_of(types {})) + template + constexpr T + erfcx_(EVE_REQUIRES(cpu_), O const&, T x) { /* * Based on: M. M. Shepherd and J. G. Laframboise, "Chebyshev Approximation of @@ -103,20 +102,4 @@ namespace eve::detail r = if_else(xpos, r, r1); return if_else(is_nan(x), eve::allbits, r); } - - template - EVE_FORCEINLINE T - erfcx_(EVE_SUPPORTS(cpu_), T x) noexcept - { - return erfcx(regular_type(), x); - } - -// ----------------------------------------------------------------------------------------------- -// Masked case - template - EVE_FORCEINLINE auto - erfcx_(EVE_SUPPORTS(cpu_), C const& cond, U const& t) noexcept - { - return mask_op(cond, eve::erfcx, t); - } } diff --git a/include/eve/module/special/regular/impl/exp_int.hpp b/include/eve/module/special/regular/impl/exp_int.hpp index 9f75d68cf2..46d7396a47 100644 --- a/include/eve/module/special/regular/impl/exp_int.hpp +++ b/include/eve/module/special/regular/impl/exp_int.hpp @@ -14,144 +14,141 @@ namespace eve::detail { -template -EVE_FORCEINLINE T -exp_int_(EVE_SUPPORTS(cpu_), T x) noexcept -{ - return exp_int(T(1), x); -} - -template -auto exp_int_(EVE_SUPPORTS(cpu_), I in, T x) noexcept -{ - if constexpr(scalar_value && scalar_value) + template + EVE_FORCEINLINE T exp_int_(EVE_REQUIRES(cpu_), O const&, T x) { - if( in == 0 ) return exp(-x) / x; - if( in < 0 ) return nan(as()); - return exp_int(T(in), x); + return exp_int(T(1), x); } - else if constexpr(integral_scalar_value && simd_value) - { - using elt_t = element_type_t; - auto n = T(convert(in, as())); - return exp_int(n, x); - } - else if constexpr(integral_simd_value && simd_value) - { - using elt_t = element_type_t; - return exp_int(convert(in, as()), x); - } - else if constexpr(integral_simd_value && scalar_value) + + template + eve::as_wide_as_t exp_int_(EVE_REQUIRES(cpu_), O const& , I in, T x) { - using elt_t = element_type_t; - using w_t = wide>; - using i_t = as_integer_t; - return exp_int(convert(in, as()), w_t(x)); + if constexpr(scalar_value && scalar_value) + { + if( in == 0 ) return exp(-x) / x; + if( in < 0 ) return nan(as()); + return exp_int(T(in), x); + } + else if constexpr(integral_scalar_value && simd_value) + { + using elt_t = element_type_t; + auto n = T(convert(in, as())); + return exp_int(n, x); + } + else if constexpr(integral_simd_value && simd_value) + { + using elt_t = element_type_t; + return exp_int(convert(in, as()), x); + } + else if constexpr(integral_simd_value && scalar_value) + { + using elt_t = element_type_t; + using w_t = wide>; + using i_t = as_integer_t; + return exp_int(convert(in, as()), w_t(x)); + } } -} - -template -EVE_FORCEINLINE T -exp_int_(EVE_SUPPORTS(cpu_), T n, T x) noexcept -{ - using elt_t = element_type_t; - if constexpr( has_native_abi_v ) + template + T exp_int_(EVE_REQUIRES(cpu_), O const& , T n, T x) { - auto notdone = is_gez(x) && is_flint(n) && is_gez(n); - T r = nan(as(x)); // if_else(notdone, zero, nan(as(x))); - auto br_zeron = [](auto x) { // n == 0 - return exp(-x) / x; - }; - notdone = next_interval(br_zeron, notdone, is_eqz(n), r, x); // n == 0 - if( eve::any(notdone) ) + using elt_t = element_type_t; + if constexpr( has_native_abi_v ) { - auto br_largen = [](auto n, auto x) { // n > 5000 - auto xk = x + n; - auto yk = rec(sqr(xk)); - auto t = n; - auto ans = yk * t * (6 * sqr(x) - 8 * t * x + sqr(t)); - ans = yk * (ans + t * (t - 2 * x)); - ans = yk * (ans + t); - return inc(ans) * exp(-x) / xk; + auto notdone = is_gez(x) && is_flint(n) && is_gez(n); + T r = nan(as(x)); // if_else(notdone, zero, nan(as(x))); + auto br_zeron = [](auto x) { // n == 0 + return exp(-x) / x; }; - notdone = next_interval(br_largen, notdone, n > 5000, r, n, x); // n > 5000 + notdone = next_interval(br_zeron, notdone, is_eqz(n), r, x); // n == 0 if( eve::any(notdone) ) { - auto br_xlt1 = [](auto n, auto x) { // here x is always le 1 - auto eqzx = is_eqz(x); - x = inc[eqzx](x); // loop is infinite for x == 0 - auto psi1 = zero(as(x)); - int32_t maxn = dec(max(1, int32_t(eve::maximum(n)))); - for( int32_t i = maxn; i != 0; --i ) psi1 = add[T(i) < n](psi1, rec(T(i))); - auto euler = ieee_constant<0x1.2788d00p-1f, 0x1.2788cfc6fb619p-1>(eve::as{}); - auto psi = -euler - log(x) + psi1; - T t; - ; - auto z = -x; - auto xk = zero(as(x)); - auto yk = one(as(x)); - auto pk = oneminus(n); - auto ans = if_else(is_eqz(pk), zero, rec(pk)); - do { - xk = inc(xk); - yk *= z / xk; - pk = inc(pk); - ans = add[is_nez(pk)](ans, yk / pk); - t = if_else(is_nez(ans), abs(yk / ans), one); - } - while( eve::any(t > epso_2(as(x))) ); - auto in = int_(n); - return add[eqzx]((eve::pow(z, dec(in)) * psi / tgamma(n)) - ans, inf(as(x))); + auto br_largen = [](auto n, auto x) { // n > 5000 + auto xk = x + n; + auto yk = rec(sqr(xk)); + auto t = n; + auto ans = yk * t * (6 * sqr(x) - 8 * t * x + sqr(t)); + ans = yk * (ans + t * (t - 2 * x)); + ans = yk * (ans + t); + return inc(ans) * exp(-x) / xk; }; - auto xlt1 = x < 1; - T xx = if_else(xlt1, x, one); - notdone = next_interval(br_xlt1, notdone, xlt1, r, n, xx); // x < 1 + notdone = next_interval(br_largen, notdone, n > 5000, r, n, x); // n > 5000 if( eve::any(notdone) ) { - auto br_xge1 = [](auto n, auto x) { // here x is always gt 1 - auto exp_intbig = (sizeof(elt_t) == 8) ? T(1ull << 57) : T(1ull << 24); - auto r = exp_intbig; - int32_t sk = 1; - T t; - auto pkm2 = one(as(x)); - auto qkm2 = x; - auto pkm1 = one(as(x)); - auto qkm1 = x + n; - auto ans = pkm1 / qkm1; + auto br_xlt1 = [](auto n, auto x) { // here x is always le 1 + auto eqzx = is_eqz(x); + x = inc[eqzx](x); // loop is infinite for x == 0 + auto psi1 = zero(as(x)); + int32_t maxn = dec(max(1, int32_t(eve::maximum(n)))); + for( int32_t i = maxn; i != 0; --i ) psi1 = add[T(i) < n](psi1, rec(T(i))); + auto euler = ieee_constant<0x1.2788d00p-1f, 0x1.2788cfc6fb619p-1>(eve::as{}); + auto psi = -euler - log(x) + psi1; + T t; + ; + auto z = -x; + auto xk = zero(as(x)); + auto yk = one(as(x)); + auto pk = oneminus(n); + auto ans = if_else(is_eqz(pk), zero, rec(pk)); do { - auto stest = is_odd(T(++sk)); - auto k_2 = T(sk >> 1); - auto yk = if_else(stest, one, x); - auto xk = add[stest](k_2, n); - auto pk = pkm1 * yk + pkm2 * xk; - auto qk = qkm1 * yk + qkm2 * xk; - r = pk / qk; - auto test = is_nez(qk); - t = if_else(test, abs((ans - r) / r), one); - ans = if_else(test, r, ans); - pkm2 = pkm1; - pkm1 = pk; - qkm2 = qkm1; - qkm1 = qk; - test = abs(pk) > exp_intbig; - auto fac = if_else(test, epso_2(as(x)), one); - pkm2 *= fac; - pkm1 *= fac; - qkm2 *= fac; - qkm1 *= fac; + xk = inc(xk); + yk *= z / xk; + pk = inc(pk); + ans = add[is_nez(pk)](ans, yk / pk); + t = if_else(is_nez(ans), abs(yk / ans), one); } - while( eve::any(t > eps(as(x))) ); - return add[x < maxlog(as(x))](zero(as(x)), ans * exp(-x)); - + while( eve::any(t > epso_2(as(x))) ); + auto in = int_(n); + return add[eqzx]((eve::pow(z, dec(in)) * psi / tgamma(n)) - ans, inf(as(x))); }; - T xx = if_else(xlt1, T(2), x); - notdone = last_interval(br_xge1, notdone, r, n, xx); + auto xlt1 = x < 1; + T xx = if_else(xlt1, x, one); + notdone = next_interval(br_xlt1, notdone, xlt1, r, n, xx); // x < 1 + if( eve::any(notdone) ) + { + auto br_xge1 = [](auto n, auto x) { // here x is always gt 1 + auto exp_intbig = (sizeof(elt_t) == 8) ? T(1ull << 57) : T(1ull << 24); + auto r = exp_intbig; + int32_t sk = 1; + T t; + auto pkm2 = one(as(x)); + auto qkm2 = x; + auto pkm1 = one(as(x)); + auto qkm1 = x + n; + auto ans = pkm1 / qkm1; + do { + auto stest = is_odd(T(++sk)); + auto k_2 = T(sk >> 1); + auto yk = if_else(stest, one, x); + auto xk = add[stest](k_2, n); + auto pk = pkm1 * yk + pkm2 * xk; + auto qk = qkm1 * yk + qkm2 * xk; + r = pk / qk; + auto test = is_nez(qk); + t = if_else(test, abs((ans - r) / r), one); + ans = if_else(test, r, ans); + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + test = abs(pk) > exp_intbig; + auto fac = if_else(test, epso_2(as(x)), one); + pkm2 *= fac; + pkm1 *= fac; + qkm2 *= fac; + qkm1 *= fac; + } + while( eve::any(t > eps(as(x))) ); + return add[x < maxlog(as(x))](zero(as(x)), ans * exp(-x)); + + }; + T xx = if_else(xlt1, T(2), x); + notdone = last_interval(br_xge1, notdone, r, n, xx); + } } } + return if_else(is_eqz(x), rec(dec(n)), r); } - return if_else(is_eqz(x), rec(dec(n)), r); + else return apply_over(exp_int, n, x); } - else return apply_over(exp_int, n, x); -} } diff --git a/include/eve/module/special/regular/impl/factorial.hpp b/include/eve/module/special/regular/impl/factorial.hpp index 33c0d20734..64050ecb75 100644 --- a/include/eve/module/special/regular/impl/factorial.hpp +++ b/include/eve/module/special/regular/impl/factorial.hpp @@ -12,213 +12,214 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -factorial_(EVE_SUPPORTS(cpu_), T n) noexcept -{ - constexpr std::array dfactorials = {{0x1p+0, - 0x1p+0, - 0x1p+1, - 0x1.8p+2, - 0x1.8p+4, - 0x1.ep+6, - 0x1.68p+9, - 0x1.3bp+12, - 0x1.3bp+15, - 0x1.626p+18, - 0x1.baf8p+21, - 0x1.308a8p+25, - 0x1.c8cfcp+28, - 0x1.7328ccp+32, - 0x1.44c3b28p+36, - 0x1.30777758p+40, - 0x1.30777758p+44, - 0x1.437eeecd8p+48, - 0x1.6beecca73p+52, - 0x1.b02b930689p+56, - 0x1.0e1b3be415ap+61, - 0x1.6283be9b5c62p+65, - 0x1.e77526159f06cp+69, - 0x1.5e5c335f8a4cep+74, - 0x1.06c52687a7b9ap+79, - 0x1.9a940c33f6121p+83, - 0x1.4d9849ea37eebp+88, - 0x1.19787e5d9f316p+93, - 0x1.ec92dd23d6967p+97, - 0x1.be6518687a785p+102, - 0x1.a27ec6e1f2d0dp+107, - 0x1.956ad0aae33a4p+112, - 0x1.956ad0aae33a4p+117, - 0x1.a21627303a541p+122, - 0x1.bc3789a33df96p+127, - 0x1.e5dcbe8a8bc8cp+132, - 0x1.114c2b2deea0fp+138, - 0x1.3c0011ed1bea1p+143, - 0x1.774015499125fp+148, - 0x1.c95619f1a8e64p+153, - 0x1.1dd5d037098fep+159, - 0x1.6e39f2c684406p+164, - 0x1.e0ac0ea48d948p+169, - 0x1.42f399d68f1fcp+175, - 0x1.bc0ef38704cbbp+180, - 0x1.383a833aef5f3p+186, - 0x1.c0d41ca4b818ep+191, - 0x1.499bc508f7324p+197, - 0x1.ee69a78d72cb6p+202, - 0x1.7a88e4484be3bp+208, - 0x1.27baf2587b49ep+214, - 0x1.d751f23d047dcp+219, - 0x1.7ef294d193a63p+225, - 0x1.3d20e33d8e45ap+231, - 0x1.0b93bfbbf00acp+237, - 0x1.cbe5f18b04928p+242, - 0x1.92693359a4003p+248, - 0x1.6665b1bbd6102p+254, - 0x1.44cc291239feap+260, - 0x1.2b6c35dccd76cp+266, - 0x1.18b5727f009f5p+272, - 0x1.0b8cf1210c97ep+278, - 0x1.0330899804332p+284, - 0x1.fe478ee34844ap+289, - 0x1.fe478ee34844ap+295, - 0x1.0320568f6ab2ep+302, - 0x1.0b395943e6087p+308, - 0x1.17c0097314d0dp+314, - 0x1.293c0a0a461dep+320, - 0x1.4074bad313983p+326, - 0x1.5e7fac56dd6e8p+332, - 0x1.84d5a3305da69p+338, - 0x1.b5705796695b6p+344, - 0x1.f2f423e7902c4p+350, - 0x1.207524c1df599p+357, - 0x1.5209471331bdp+363, - 0x1.916b0466cb107p+369, - 0x1.e2f4c14bac4fcp+375, - 0x1.264d25ca1d009p+382, - 0x1.6b473aa57bcccp+388, - 0x1.c619094edabffp+394, - 0x1.1f5bd7e3e66d7p+401, - 0x1.702dac9bff3c4p+407, - 0x1.dd7b3bda4f022p+413, - 0x1.3958df4743d96p+420, - 0x1.a02a088aa61cbp+426, - 0x1.179c3dbd279b5p+433, - 0x1.7c1863ed21d72p+439, - 0x1.0550c4b30743ep+446, - 0x1.6b645188f61a6p+452, - 0x1.ff0512a89a152p+458, - 0x1.6b4d9b43dd8bp+465, - 0x1.051fc798c73bfp+472, - 0x1.7b722e0a01831p+478, - 0x1.16a7d9cf591c4p+485, - 0x1.9da1274fc845fp+491, - 0x1.3638dd7bd6347p+498, - 0x1.d62e2fafb0a78p+504, - 0x1.67fb5c8283404p+511, - 0x1.166c698cf183bp+518, - 0x1.b30964ec395dcp+524, - 0x1.574569a26544p+531, - 0x1.118b502d68b23p+538, - 0x1.b83c3509147ecp+544, - 0x1.65b0eb1760a7p+551, - 0x1.256b20d92d49p+558, - 0x1.e5f96e67b300ep+564, - 0x1.963e824aafa2cp+571, - 0x1.56c4bdef04315p+578, - 0x1.23e389bd8992p+585, - 0x1.f5af14bdc472fp+591, - 0x1.b30dd3fc905bap+598, - 0x1.7cac197cfe503p+605, - 0x1.500fee805882dp+612, - 0x1.2b4e306a4ed48p+619, - 0x1.0ce83f7f82d2fp+626, - 0x1.e764f3171d1e4p+632, - 0x1.bd824633209dbp+639, - 0x1.9ab418b722116p+646, - 0x1.7dd36efa41ac2p+653, - 0x1.65f6380a9d916p+660, - 0x1.5262c0fa08f37p+667, - 0x1.42861fee5088p+674, - 0x1.35ece2af0162bp+681, - 0x1.2c3d7b998957ap+688, - 0x1.25340ab3f01f9p+695, - 0x1.209f3a89205f1p+702, - 0x1.1e5dfc140e1e5p+709, - 0x1.1e5dfc140e1e5p+716, - 0x1.209ab80c363a9p+723, - 0x1.251d22ec67138p+730, - 0x1.2bfbd1bdf17dfp+737, - 0x1.355bb04be109ep+744, - 0x1.4171452ed7d44p+751, - 0x1.5082946d09f23p+758, - 0x1.62e9b88b007d7p+765, - 0x1.79185413b0855p+772, - 0x1.939c09fd12eebp+779, - 0x1.b3243ac4d8695p+786, - 0x1.d88957d1c3026p+793, - 0x1.026b1c06b6a55p+801, - 0x1.1ca9fcdf65321p+808, - 0x1.3bcc9487d4439p+815, - 0x1.60ce8defbf238p+822, - 0x1.8ce85fadb707ep+829, - 0x1.c19f3c62c956fp+836, - 0x1.006cd07056d39p+844, - 0x1.267cf76103b7p+851, - 0x1.54807e082c4b9p+858, - 0x1.8c5d92b5839p+865, - 0x1.d07da7ecb62ccp+872, - 0x1.11fa1e0c9f746p+880, - 0x1.455903aefd5a3p+887, - 0x1.84e466672ad5dp+894, - 0x1.d3e2cb341f894p+901, - 0x1.1b4a51088f182p+909, - 0x1.594292c26e656p+916, - 0x1.a77ba8027b686p+923, - 0x1.055e51b1882a7p+931, - 0x1.44ab297a8724bp+938, - 0x1.95d5f3d928edep+945, - 0x1.fe771cb7257b3p+952, - 0x1.4307602be5b7fp+960, - 0x1.9b5b6477e6884p+967, - 0x1.07868c5ccfaf4p+975, - 0x1.53b370efa3b7fp+982, - 0x1.b88cb676c8529p+989, - 0x1.1f63cb077cadep+997, - 0x1.7932fa79d3a43p+1004, - 0x1.f2054eb4d96ecp+1011}}; - if constexpr( has_native_abi_v ) + template + EVE_FORCEINLINE + as_wide_as_t + factorial_(EVE_REQUIRES(cpu_), O const&, T n) noexcept { - if constexpr( scalar_value ) { return (n < 171u) ? dfactorials[n] : inf(as()); } + if constexpr(signed_integral_value) + { + EVE_ASSERT(eve::all(is_gez(n)), "factorial : some entry elements are not positive"); + return factorial(uint_(n)); + } else { - auto test = (n < 171u); - auto nn = if_else(test, n, zero); - auto r = gather(&dfactorials[0], nn); - return if_else(test, r, inf(as(r))); + constexpr std::array dfactorials = {{0x1p+0, + 0x1p+0, + 0x1p+1, + 0x1.8p+2, + 0x1.8p+4, + 0x1.ep+6, + 0x1.68p+9, + 0x1.3bp+12, + 0x1.3bp+15, + 0x1.626p+18, + 0x1.baf8p+21, + 0x1.308a8p+25, + 0x1.c8cfcp+28, + 0x1.7328ccp+32, + 0x1.44c3b28p+36, + 0x1.30777758p+40, + 0x1.30777758p+44, + 0x1.437eeecd8p+48, + 0x1.6beecca73p+52, + 0x1.b02b930689p+56, + 0x1.0e1b3be415ap+61, + 0x1.6283be9b5c62p+65, + 0x1.e77526159f06cp+69, + 0x1.5e5c335f8a4cep+74, + 0x1.06c52687a7b9ap+79, + 0x1.9a940c33f6121p+83, + 0x1.4d9849ea37eebp+88, + 0x1.19787e5d9f316p+93, + 0x1.ec92dd23d6967p+97, + 0x1.be6518687a785p+102, + 0x1.a27ec6e1f2d0dp+107, + 0x1.956ad0aae33a4p+112, + 0x1.956ad0aae33a4p+117, + 0x1.a21627303a541p+122, + 0x1.bc3789a33df96p+127, + 0x1.e5dcbe8a8bc8cp+132, + 0x1.114c2b2deea0fp+138, + 0x1.3c0011ed1bea1p+143, + 0x1.774015499125fp+148, + 0x1.c95619f1a8e64p+153, + 0x1.1dd5d037098fep+159, + 0x1.6e39f2c684406p+164, + 0x1.e0ac0ea48d948p+169, + 0x1.42f399d68f1fcp+175, + 0x1.bc0ef38704cbbp+180, + 0x1.383a833aef5f3p+186, + 0x1.c0d41ca4b818ep+191, + 0x1.499bc508f7324p+197, + 0x1.ee69a78d72cb6p+202, + 0x1.7a88e4484be3bp+208, + 0x1.27baf2587b49ep+214, + 0x1.d751f23d047dcp+219, + 0x1.7ef294d193a63p+225, + 0x1.3d20e33d8e45ap+231, + 0x1.0b93bfbbf00acp+237, + 0x1.cbe5f18b04928p+242, + 0x1.92693359a4003p+248, + 0x1.6665b1bbd6102p+254, + 0x1.44cc291239feap+260, + 0x1.2b6c35dccd76cp+266, + 0x1.18b5727f009f5p+272, + 0x1.0b8cf1210c97ep+278, + 0x1.0330899804332p+284, + 0x1.fe478ee34844ap+289, + 0x1.fe478ee34844ap+295, + 0x1.0320568f6ab2ep+302, + 0x1.0b395943e6087p+308, + 0x1.17c0097314d0dp+314, + 0x1.293c0a0a461dep+320, + 0x1.4074bad313983p+326, + 0x1.5e7fac56dd6e8p+332, + 0x1.84d5a3305da69p+338, + 0x1.b5705796695b6p+344, + 0x1.f2f423e7902c4p+350, + 0x1.207524c1df599p+357, + 0x1.5209471331bdp+363, + 0x1.916b0466cb107p+369, + 0x1.e2f4c14bac4fcp+375, + 0x1.264d25ca1d009p+382, + 0x1.6b473aa57bcccp+388, + 0x1.c619094edabffp+394, + 0x1.1f5bd7e3e66d7p+401, + 0x1.702dac9bff3c4p+407, + 0x1.dd7b3bda4f022p+413, + 0x1.3958df4743d96p+420, + 0x1.a02a088aa61cbp+426, + 0x1.179c3dbd279b5p+433, + 0x1.7c1863ed21d72p+439, + 0x1.0550c4b30743ep+446, + 0x1.6b645188f61a6p+452, + 0x1.ff0512a89a152p+458, + 0x1.6b4d9b43dd8bp+465, + 0x1.051fc798c73bfp+472, + 0x1.7b722e0a01831p+478, + 0x1.16a7d9cf591c4p+485, + 0x1.9da1274fc845fp+491, + 0x1.3638dd7bd6347p+498, + 0x1.d62e2fafb0a78p+504, + 0x1.67fb5c8283404p+511, + 0x1.166c698cf183bp+518, + 0x1.b30964ec395dcp+524, + 0x1.574569a26544p+531, + 0x1.118b502d68b23p+538, + 0x1.b83c3509147ecp+544, + 0x1.65b0eb1760a7p+551, + 0x1.256b20d92d49p+558, + 0x1.e5f96e67b300ep+564, + 0x1.963e824aafa2cp+571, + 0x1.56c4bdef04315p+578, + 0x1.23e389bd8992p+585, + 0x1.f5af14bdc472fp+591, + 0x1.b30dd3fc905bap+598, + 0x1.7cac197cfe503p+605, + 0x1.500fee805882dp+612, + 0x1.2b4e306a4ed48p+619, + 0x1.0ce83f7f82d2fp+626, + 0x1.e764f3171d1e4p+632, + 0x1.bd824633209dbp+639, + 0x1.9ab418b722116p+646, + 0x1.7dd36efa41ac2p+653, + 0x1.65f6380a9d916p+660, + 0x1.5262c0fa08f37p+667, + 0x1.42861fee5088p+674, + 0x1.35ece2af0162bp+681, + 0x1.2c3d7b998957ap+688, + 0x1.25340ab3f01f9p+695, + 0x1.209f3a89205f1p+702, + 0x1.1e5dfc140e1e5p+709, + 0x1.1e5dfc140e1e5p+716, + 0x1.209ab80c363a9p+723, + 0x1.251d22ec67138p+730, + 0x1.2bfbd1bdf17dfp+737, + 0x1.355bb04be109ep+744, + 0x1.4171452ed7d44p+751, + 0x1.5082946d09f23p+758, + 0x1.62e9b88b007d7p+765, + 0x1.79185413b0855p+772, + 0x1.939c09fd12eebp+779, + 0x1.b3243ac4d8695p+786, + 0x1.d88957d1c3026p+793, + 0x1.026b1c06b6a55p+801, + 0x1.1ca9fcdf65321p+808, + 0x1.3bcc9487d4439p+815, + 0x1.60ce8defbf238p+822, + 0x1.8ce85fadb707ep+829, + 0x1.c19f3c62c956fp+836, + 0x1.006cd07056d39p+844, + 0x1.267cf76103b7p+851, + 0x1.54807e082c4b9p+858, + 0x1.8c5d92b5839p+865, + 0x1.d07da7ecb62ccp+872, + 0x1.11fa1e0c9f746p+880, + 0x1.455903aefd5a3p+887, + 0x1.84e466672ad5dp+894, + 0x1.d3e2cb341f894p+901, + 0x1.1b4a51088f182p+909, + 0x1.594292c26e656p+916, + 0x1.a77ba8027b686p+923, + 0x1.055e51b1882a7p+931, + 0x1.44ab297a8724bp+938, + 0x1.95d5f3d928edep+945, + 0x1.fe771cb7257b3p+952, + 0x1.4307602be5b7fp+960, + 0x1.9b5b6477e6884p+967, + 0x1.07868c5ccfaf4p+975, + 0x1.53b370efa3b7fp+982, + 0x1.b88cb676c8529p+989, + 0x1.1f63cb077cadep+997, + 0x1.7932fa79d3a43p+1004, + 0x1.f2054eb4d96ecp+1011}}; + if constexpr( has_native_abi_v ) + { + if constexpr( scalar_value ) { return (n < 171u) ? dfactorials[n] : inf(as()); } + else + { + auto test = (n < 171u); + auto nn = if_else(test, n, zero); + auto r = gather(&dfactorials[0], nn); + return if_else(test, r, inf(as(r))); + } + } + else return apply_over(factorial, n); } } - else return apply_over(factorial, n); -} -template -EVE_FORCEINLINE auto -factorial_(EVE_SUPPORTS(cpu_), T n) noexcept -{ - EVE_ASSERT(eve::all(is_gez(n)), "factorial : some entry elements are not positive"); - return factorial(uint_(n)); -} - -template -EVE_FORCEINLINE auto -factorial_(EVE_SUPPORTS(cpu_), T n) noexcept -{ - using elt_t = element_type_t; - EVE_ASSERT(eve::all(is_flint(n)), "factorial : some entry elements are not flint"); - EVE_ASSERT(eve::all(is_gez(n)), "factorial : some entry elements are not positive"); - auto nn = uint_(n); - auto r = factorial(nn); - if constexpr( std::same_as ) return r; - else return float32(r); -} + template + EVE_FORCEINLINE + T factorial_(EVE_REQUIRES(cpu_), O const&, T n) noexcept + { + using elt_t = element_type_t; + EVE_ASSERT(eve::all(is_flint(n)), "factorial : some entry elements are not flint"); + EVE_ASSERT(eve::all(is_gez(n)), "factorial : some entry elements are not positive"); + auto nn = uint_(n); + auto r = factorial(nn); + if constexpr( std::same_as ) return r; + else return float32(r); + } } diff --git a/include/eve/module/special/regular/impl/gamma_p.hpp b/include/eve/module/special/regular/impl/gamma_p.hpp index 91a67112dd..80ebb72f53 100644 --- a/include/eve/module/special/regular/impl/gamma_p.hpp +++ b/include/eve/module/special/regular/impl/gamma_p.hpp @@ -16,105 +16,86 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -gamma_p_(EVE_SUPPORTS(cpu_), T a, U b) noexcept --> common_value_t -{ - return arithmetic_call(gamma_p, a, b); -} - -template -T -gamma_p_(EVE_SUPPORTS(cpu_), T x, T a) noexcept requires has_native_abi_v -{ - auto const third = T(1 / 3.0); - auto res = nan(as()); // nan case treated here - auto notdone = is_not_nan(x); - const auto amax = T(1048576); - auto test = (a > amax); - if( eve::any(test) ) - { - auto z = eve::fma(1024 * rsqrt(a), x - (a - third), amax - third); - x = eve::max(z, zero(as(x))); - a = if_else(test, amax, a); - } - auto lginc = [](auto a0, auto a1, auto test) + template + eve::common_value_t + gamma_p_(EVE_REQUIRES(cpu_), O const&, T x, U a) noexcept { - // insure convergence in each case for all members of simd vector - // making x = a+1 when the test do not succeed - auto x = if_else(test, a0, a1); - - // Series expansion for x < a+1 - auto ap = a1; - auto del = one(as(ap)); - auto sum = del; - - while( eve::any(abs(del) >= T(100) * epsilon(maximum(abs(sum)))) ) + if constexpr(std::same_as) { - ap += one(as(ap)); - del = x * del / ap; - sum += del; - } - auto b = sum * eve::exp(fms(a1, eve::log(x), eve::log_abs_gamma(inc(a1)) + x)); - // For very small a, the series may overshoot very slightly. - b = eve::min(b, one(as(b))); - // if lower, b(k) = bk; else b(k) = 1-bk; end - return if_else(is_eqz(a0) && is_eqz(a1), one, b); - }; - auto uginc = [](auto x, auto a, auto test) - { - // insure convergence in each case for all members of simd vector - // making x = a < a+1 when the test do not succeed - x = if_else(test, x, inc(a)); - // Continued fraction for x >= a+1 - // k = find(a ~= 0 & x >= a+1); % & x ~= 0 - auto x0 = one(as(x)); - auto x1 = x; - auto b0 = zero(as(x)); - auto b1 = x0; - auto fac = rec(x1); - auto n = one(as(x)); - auto g = b1 * fac; - auto gold = b0; + auto const third = T(1 / 3.0); + auto res = nan(as()); // nan case treated here + auto notdone = is_not_nan(x); + const auto amax = T(1048576); + auto test = (a > amax); + if( eve::any(test) ) + { + auto z = eve::fma(1024 * rsqrt(a), x - (a - third), amax - third); + x = eve::max(z, zero(as(x))); + a = if_else(test, amax, a); + } + auto lginc = [](auto a0, auto a1, auto test) + { + // insure convergence in each case for all members of simd vector + // making x = a+1 when the test do not succeed + auto x = if_else(test, a0, a1); - while( maximum(abs(g - gold)) >= 100 * eve::epsilon(maximum(eve::abs(g))) ) - { - gold = g; - auto ana = n - a; - x0 = fma(x0, ana, x1) * fac; - b0 = fma(b0, ana, b1) * fac; - auto anf = n * fac; - x1 = fma(x, x0, anf * x1); - b1 = fma(x, b0, anf * b1); - fac = rec(x1); - g = b1 * fac; - n = inc(n); - } - return if_else(eve::is_infinite(x), one, oneminus(exp(-x + a * log(x) - log_abs_gamma(a)) * g)); - }; + // Series expansion for x < a+1 + auto ap = a1; + auto del = one(as(ap)); + auto sum = del; - test = x < inc(a); - notdone = next_interval(lginc, notdone, test, res, x, a, test); - if( eve::any(notdone) ) last_interval(uginc, notdone, res, x, a, !test); - return res; -} + while( eve::any(abs(del) >= T(100) * epsilon(maximum(abs(sum)))) ) + { + ap += one(as(ap)); + del = x * del / ap; + sum += del; + } + auto b = sum * eve::exp(fms(a1, eve::log(x), eve::log_abs_gamma(inc(a1)) + x)); + // For very small a, the series may overshoot very slightly. + b = eve::min(b, one(as(b))); + // if lower, b(k) = bk; else b(k) = 1-bk; end + return if_else(is_eqz(a0) && is_eqz(a1), one, b); + }; + auto uginc = [](auto x, auto a, auto test) + { + // insure convergence in each case for all members of simd vector + // making x = a < a+1 when the test do not succeed + x = if_else(test, x, inc(a)); + // Continued fraction for x >= a+1 + // k = find(a ~= 0 & x >= a+1); % & x ~= 0 + auto x0 = one(as(x)); + auto x1 = x; + auto b0 = zero(as(x)); + auto b1 = x0; + auto fac = rec(x1); + auto n = one(as(x)); + auto g = b1 * fac; + auto gold = b0; -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -gamma_p_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, gamma_p(t0, ts...), t0)) -{ - return mask_op(cond, eve::gamma_p, t0, ts ...); -} + while( maximum(abs(g - gold)) >= 100 * eve::epsilon(maximum(eve::abs(g))) ) + { + gold = g; + auto ana = n - a; + x0 = fma(x0, ana, x1) * fac; + b0 = fma(b0, ana, b1) * fac; + auto anf = n * fac; + x1 = fma(x, x0, anf * x1); + b1 = fma(x, b0, anf * b1); + fac = rec(x1); + g = b1 * fac; + n = inc(n); + } + return if_else(eve::is_infinite(x), one, oneminus(exp(-x + a * log(x) - log_abs_gamma(a)) * g)); + }; -template -EVE_FORCEINLINE auto -gamma_p_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, gamma_p(t0, ts...), t0)) -{ - return mask_op(cond, d(eve::gamma_p), t0, ts ...); -} + test = x < inc(a); + notdone = next_interval(lginc, notdone, test, res, x, a, test); + if( eve::any(notdone) ) last_interval(uginc, notdone, res, x, a, !test); + return res; + } + else + { + return arithmetic_call(gamma_p, x, a); + } + } } diff --git a/include/eve/module/special/regular/impl/gamma_p_inv.hpp b/include/eve/module/special/regular/impl/gamma_p_inv.hpp index e93d511604..96f62ef0aa 100644 --- a/include/eve/module/special/regular/impl/gamma_p_inv.hpp +++ b/include/eve/module/special/regular/impl/gamma_p_inv.hpp @@ -17,83 +17,64 @@ namespace eve::detail { -template -auto -gamma_p_inv_(EVE_SUPPORTS(cpu_), T a, U b) noexcept -->common_value_t -{ - return arithmetic_call(gamma_p_inv, a, b); -} - -template -T -gamma_p_inv_(EVE_SUPPORTS(cpu_), T p, T k) noexcept requires has_native_abi_v -{ - if constexpr( std::is_same_v ) { return float32(gamma_p_inv(float64(p), float64(k))); } - p = if_else(is_ltz(p) || p > one(as(p)), allbits, p); - auto iseqzp = is_eqz(p); - auto iseq1p = p == one(as(p)); - auto x = if_else(iseq1p, inf(as(p)), if_else(iseqzp, zero(as(p)), allbits)); - logical notdone(is_not_nan(p) && !iseqzp && !iseq1p); - auto d = rec(9 * k); - auto omp = oneminus(p); - auto y = oneminus(d - sqrt_2(as(omp)) * erfc_inv(2 * omp) * eve::sqrt(d)); - - x = if_else(notdone, k * sqr(y) * y, x); - auto x0 = x; - int i = 10; - if( eve::none(notdone) ) return x; - auto dgamma_p = [](auto x, auto k) { return exp(dec(k) * log(x) - x - log_abs_gamma(k)); }; - while( i ) + template + eve::common_value_t + gamma_p_inv_(EVE_REQUIRES(cpu_), O const&, T p, U k) noexcept { - auto dx = if_else(notdone, (gamma_p(x, k) - p) / dgamma_p(x, k), zero); - x -= dx; - if( i < 7 ) - notdone = notdone && is_not_less(abs(dx), 4 * eps(as(x)) * max(eve::abs(x), one(as(x)))); - if( eve::none(notdone) ) return x; - --i; - } + if constexpr(std::same_as) + { + if constexpr( std::is_same_v ) { return float32(gamma_p_inv(float64(p), float64(k))); } + p = if_else(is_ltz(p) || p > one(as(p)), allbits, p); + auto iseqzp = is_eqz(p); + auto iseq1p = p == one(as(p)); + auto x = if_else(iseq1p, inf(as(p)), if_else(iseqzp, zero(as(p)), allbits)); + logical notdone(is_not_nan(p) && !iseqzp && !iseq1p); + auto d = rec(9 * k); + auto omp = oneminus(p); + auto y = oneminus(d - sqrt_2(as(omp)) * erfc_inv(2 * omp) * eve::sqrt(d)); - notdone = notdone || is_ltz(y); - x = if_else(notdone, eve::abs(x0), x); - auto xlo = if_else(notdone, eve::min(x / 2, zero(as(x))), x); - auto xhi = if_else(notdone, eve::min(x * 2, eve::valmax(as(x))), x); - auto inl = ((gamma_p(xlo, k) > p) || (gamma_p(xhi, k) < p)) && (xlo != xhi); - while( eve::any(inl) ) - { - xlo = if_else(inl, eve::max(xlo / 2, zero(as(x))), xlo); - xhi = if_else(inl, eve::min(xhi * 2, eve::valmax(as(x))), xhi); - inl = ((gamma_p(xlo, k) > p) || (gamma_p(xhi, k) < p)) && (xlo != xhi); - } - auto xmed = average(xlo, xhi); - while( eve::any(notdone) ) - { - auto gmp = gamma_p(xmed, k); - auto test = (gmp < p); - xlo = if_else(test, xmed, xlo); - xhi = if_else(test, xhi, xmed); - notdone = (ulpdist(xlo, xhi) > 1) && gmp != 0; - xmed = average(xlo, xhi); - } - xmed = if_else(iseq1p, inf(as(p)), if_else(iseqzp, zero(as(p)), xmed)); - return if_else(k == one(as(k)), -eve::log1p(-p), xmed); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -gamma_p_inv_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, gamma_p_inv(t0, ts...), t0)) -{ - return mask_op(cond, eve::gamma_p_inv, t0, ts ...); -} + x = if_else(notdone, k * sqr(y) * y, x); + auto x0 = x; + int i = 10; + if( eve::none(notdone) ) return x; + auto dgamma_p = [](auto x, auto k) { return exp(dec(k) * log(x) - x - log_abs_gamma(k)); }; + while( i ) + { + auto dx = if_else(notdone, (gamma_p(x, k) - p) / dgamma_p(x, k), zero); + x -= dx; + if( i < 7 ) + notdone = notdone && is_not_less(abs(dx), 4 * eps(as(x)) * max(eve::abs(x), one(as(x)))); + if( eve::none(notdone) ) return x; + --i; + } -template -EVE_FORCEINLINE auto -gamma_p_inv_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, gamma_p_inv(t0, ts...), t0)) -{ - return mask_op(cond, d(eve::gamma_p_inv), t0, ts ...); -} + notdone = notdone || is_ltz(y); + x = if_else(notdone, eve::abs(x0), x); + auto xlo = if_else(notdone, eve::min(x / 2, zero(as(x))), x); + auto xhi = if_else(notdone, eve::min(x * 2, eve::valmax(as(x))), x); + auto inl = ((gamma_p(xlo, k) > p) || (gamma_p(xhi, k) < p)) && (xlo != xhi); + while( eve::any(inl) ) + { + xlo = if_else(inl, eve::max(xlo / 2, zero(as(x))), xlo); + xhi = if_else(inl, eve::min(xhi * 2, eve::valmax(as(x))), xhi); + inl = ((gamma_p(xlo, k) > p) || (gamma_p(xhi, k) < p)) && (xlo != xhi); + } + auto xmed = average(xlo, xhi); + while( eve::any(notdone) ) + { + auto gmp = gamma_p(xmed, k); + auto test = (gmp < p); + xlo = if_else(test, xmed, xlo); + xhi = if_else(test, xhi, xmed); + notdone = (ulpdist(xlo, xhi) > 1) && gmp != 0; + xmed = average(xlo, xhi); + } + xmed = if_else(iseq1p, inf(as(p)), if_else(iseqzp, zero(as(p)), xmed)); + return if_else(k == one(as(k)), -eve::log1p(-p), xmed); + } + else + { + return arithmetic_call(gamma_p, p, k); + } + } } diff --git a/include/eve/module/special/regular/impl/lambert.hpp b/include/eve/module/special/regular/impl/lambert.hpp index 821d137af5..1068ea311d 100644 --- a/include/eve/module/special/regular/impl/lambert.hpp +++ b/include/eve/module/special/regular/impl/lambert.hpp @@ -14,95 +14,95 @@ namespace eve::detail { -template -EVE_FORCEINLINE constexpr auto -lambert_serie_utility(T r) noexcept -{ - using elt_t = element_type_t; - using A3 = kumi::result::generate_t<3, elt_t>; - if constexpr( sizeof(elt_t) == 8 ) + template + EVE_FORCEINLINE constexpr auto + lambert_serie_utility(T r) { - constexpr A3 P = { + using elt_t = element_type_t; + using A3 = kumi::result::generate_t<3, elt_t>; + if constexpr( sizeof(elt_t) == 8 ) + { + constexpr A3 P = { 0.000000000000000000000e+00, 2.331643981597117584689e+00, 1.931973535237478945863e+00, - }; - constexpr A3 Q = { + }; + constexpr A3 Q = { 1.000000000000000000000e+00, 1.605803223118019582808e+00, 4.174677763382451962312e-01, - }; - return dec(reverse_horner(r, P) / reverse_horner(r, Q)); - } - else - { - return fam(mone(as(r)) - , r - , eve::horner(r, -1.80949529206e+00f, 2.33164314895e+00f)); - } -} - -template -EVE_FORCEINLINE constexpr auto -lambert_(EVE_SUPPORTS(cpu_), T x) noexcept -{ - auto halley = [](auto x, auto w_i, auto max_iters) - { - auto w = w_i; - for( int i = 0; i < max_iters; i++ ) + }; + return dec(reverse_horner(r, P) / reverse_horner(r, Q)); + } + else { - T e = eve::exp(w); - T p = inc(w); - T t = fms(w, e, x); - t /= if_else(is_gtz(w), e * p, e * p - T(0.5) * inc(p) * t / p); - w -= t; - T tol = 10 * eps(as(x)) * eve::max(eve::abs(w), rec(eve::abs(p) * e)); - if( eve::all(eve::abs(t) < tol) ) break; + return fam(mone(as(r)) + , r + , eve::horner(r, -1.80949529206e+00f, 2.33164314895e+00f)); } - return w; - }; + } - if constexpr( has_native_abi_v ) + template + constexpr kumi::tuple + lambert_(EVE_REQUIRES(cpu_), O const&, T x) { - T q = x + T(0.367879441171442); - auto lambert0_small = [](auto q) { // branch 0 q <= 1.0e-3 - return lambert_serie_utility(eve::sqrt(q)); - }; - auto lambert0_other = [&halley](auto x, auto q) { // branch 0 q <= 1.0e'3 - auto p = eve::sqrt(T(5.436563656918090) * q); - auto w1 = dec(p * (inc(p * fam(T(-1.0 / 3), p, T(1.0 / 72))))); - auto w2 = log(x); - w2 -= if_else(x > 3, zero, log(w2)); - auto init = if_else(x < one(as(x)), w1, w2); - return halley(x, init, 10); - }; - auto lambert1 = [&halley](auto x, auto q, auto positivex) { // branch 1 q > 0 - T r = -eve::sqrt(q); - auto test = (x < T(-1.0e-6)); - T w1(0); - if( eve::any(test) ) w1 = lambert_serie_utility(r); - if( eve::all(test) && eve::all(q < T(3.0e-3)) ) return w1; - T l1 = log(-x); - T l2 = log(-l1); - T w2 = l1 - l2 + l2 / l1; - return if_else(is_eqz(x) && !positivex, minf(as(x)), halley(x, w2, 30)); - }; + auto halley = [](auto x, auto w_i, auto max_iters) + { + auto w = w_i; + for( int i = 0; i < max_iters; i++ ) + { + T e = eve::exp(w); + T p = inc(w); + T t = fms(w, e, x); + t /= if_else(is_gtz(w), e * p, e * p - T(0.5) * inc(p) * t / p); + w -= t; + T tol = 10 * eps(as(x)) * eve::max(eve::abs(w), rec(eve::abs(p) * e)); + if( eve::all(eve::abs(t) < tol) ) break; + } + return w; + }; - auto r = nan(as()); // nan case treated here - r = if_else(is_eqz(x), zero, r); // zero case treated here - r = if_else(x == inf(as(x)), x, r); - auto notdone = is_nlez(q) && (q != inf(as(x))); - if( eve::any(notdone) ) + if constexpr( has_native_abi_v ) { - notdone = next_interval(lambert0_small, notdone, q < T(1.0e-3), r, q); - if( eve::any(notdone) ) { notdone = last_interval(lambert0_other, q >= T(1.0e-3), r, x, q); } + T q = x + T(0.367879441171442); + auto lambert0_small = [](auto q) { // branch 0 q <= 1.0e-3 + return lambert_serie_utility(eve::sqrt(q)); + }; + auto lambert0_other = [&halley](auto x, auto q) { // branch 0 q <= 1.0e'3 + auto p = eve::sqrt(T(5.436563656918090) * q); + auto w1 = dec(p * (inc(p * fam(T(-1.0 / 3), p, T(1.0 / 72))))); + auto w2 = log(x); + w2 -= if_else(x > 3, zero, log(w2)); + auto init = if_else(x < one(as(x)), w1, w2); + return halley(x, init, 10); + }; + auto lambert1 = [&halley](auto x, auto q, auto positivex) { // branch 1 q > 0 + T r = -eve::sqrt(q); + auto test = (x < T(-1.0e-6)); + T w1(0); + if( eve::any(test) ) w1 = lambert_serie_utility(r); + if( eve::all(test) && eve::all(q < T(3.0e-3)) ) return w1; + T l1 = log(-x); + T l2 = log(-l1); + T w2 = l1 - l2 + l2 / l1; + return if_else(is_eqz(x) && !positivex, minf(as(x)), halley(x, w2, 30)); + }; + + auto r = nan(as()); // nan case treated here + r = if_else(is_eqz(x), zero, r); // zero case treated here + r = if_else(x == inf(as(x)), x, r); + auto notdone = is_nlez(q) && (q != inf(as(x))); + if( eve::any(notdone) ) + { + notdone = next_interval(lambert0_small, notdone, q < T(1.0e-3), r, q); + if( eve::any(notdone) ) { notdone = last_interval(lambert0_other, q >= T(1.0e-3), r, x, q); } + } + auto positivex = is_positive(x); + if( eve::all(positivex) ) return kumi::make_tuple(r, r); + auto r1 = if_else(positivex, r, lambert1(x, q, positivex)); + return kumi::make_tuple(r, r1); } - auto positivex = is_positive(x); - if( eve::all(positivex) ) return kumi::make_tuple(r, r); - auto r1 = if_else(positivex, r, lambert1(x, q, positivex)); - return kumi::make_tuple(r, r1); + else return apply_over2(lambert, x); } - else return apply_over2(lambert, x); -} } diff --git a/include/eve/module/special/regular/impl/lbeta.hpp b/include/eve/module/special/regular/impl/lbeta.hpp index 2425938a7e..0817c3aeeb 100644 --- a/include/eve/module/special/regular/impl/lbeta.hpp +++ b/include/eve/module/special/regular/impl/lbeta.hpp @@ -12,36 +12,18 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -lbeta_(EVE_SUPPORTS(cpu_), T a, U b) noexcept --> common_value_t -{ - return arithmetic_call(lbeta, a, b); -} - -template -EVE_FORCEINLINE T -lbeta_(EVE_SUPPORTS(cpu_), T a0, T a1) noexcept requires(has_native_abi_v) -{ - auto y = a0 + a1; - return log_abs_gamma(a0) + log_abs_gamma(a1) - log_abs_gamma(y); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -lbeta_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, lbeta(t0, ts...), t0)) -{ - return mask_op(cond, eve::lbeta, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -lbeta_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::lbeta), t0, ts ...); -} + template< typename T0, typename T1, callable_options O> + EVE_FORCEINLINE + eve::common_value_t lbeta_(EVE_REQUIRES(cpu_), O const&, T0 const& a0, T1 const & a1) + { + if constexpr(std::same_as) + { + auto y = a0 + a1; + return log_abs_gamma(a0) + log_abs_gamma(a1) - log_abs_gamma(y); + } + else + { + return arithmetic_call(lbeta, a0, a1); + } + } } diff --git a/include/eve/module/special/regular/impl/lfactorial.hpp b/include/eve/module/special/regular/impl/lfactorial.hpp index b3e254b2b6..baa98cc925 100644 --- a/include/eve/module/special/regular/impl/lfactorial.hpp +++ b/include/eve/module/special/regular/impl/lfactorial.hpp @@ -13,9 +13,9 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -lfactorial_(EVE_SUPPORTS(cpu_), T n) noexcept +template +EVE_FORCEINLINE decltype(eve::factorial(T())) +lfactorial_(EVE_REQUIRES(cpu_), O const&, T n) noexcept { EVE_ASSERT(eve::all(is_flint(n)), "lfactorial : some entry elements are not flint"); EVE_ASSERT(eve::all(is_gez(n)), "lfactorial : some entry elements are not positive"); diff --git a/include/eve/module/special/regular/impl/log_abs_gamma.hpp b/include/eve/module/special/regular/impl/log_abs_gamma.hpp index 32df25f4bd..2dbcf614bb 100644 --- a/include/eve/module/special/regular/impl/log_abs_gamma.hpp +++ b/include/eve/module/special/regular/impl/log_abs_gamma.hpp @@ -16,7 +16,7 @@ namespace eve::detail namespace helpers { template - inline T + EVE_FORCEINLINE T large_negative(T q) { T w = eve::log_abs_gamma(q); @@ -78,9 +78,9 @@ namespace eve::detail } } - template + template constexpr T - log_abs_gamma_(EVE_SUPPORTS(cpu_), T a0) noexcept + log_abs_gamma_(EVE_REQUIRES(cpu_), O const&, T a0) { if constexpr( has_native_abi_v ) { @@ -405,20 +405,4 @@ namespace eve::detail } else return apply_over(log_abs_gamma, a0); } - -// ----------------------------------------------------------------------------------------------- -// Masked cases - template - EVE_FORCEINLINE auto - log_abs_gamma_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept - { - return mask_op(cond, eve::log_abs_gamma, ts ...); - } - - template - EVE_FORCEINLINE auto - log_abs_gamma_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept - { - return mask_op(cond, d(eve::log_abs_gamma), ts ...); - } } diff --git a/include/eve/module/special/regular/impl/log_gamma.hpp b/include/eve/module/special/regular/impl/log_gamma.hpp index 020927bca7..e2f68287b3 100644 --- a/include/eve/module/special/regular/impl/log_gamma.hpp +++ b/include/eve/module/special/regular/impl/log_gamma.hpp @@ -13,31 +13,15 @@ namespace eve::detail { -template -constexpr T -log_gamma_(EVE_SUPPORTS(cpu_), T a0) noexcept -{ - if constexpr( has_native_abi_v ) + template + constexpr T + log_gamma_(EVE_REQUIRES(cpu_), O const&, T a0) noexcept { - auto aa0 = if_else(a0 == minf(as(a0)) || is_lez(signgam(a0)), allbits, a0); - return log_abs_gamma(aa0); + if constexpr( has_native_abi_v ) + { + auto aa0 = if_else(a0 == minf(as(a0)) || is_lez(signgam(a0)), allbits, a0); + return log_abs_gamma(aa0); + } + else return apply_over(log_gamma, a0); } - else return apply_over(log_gamma, a0); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -log_gamma_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::log_gamma, ts ...); -} - -template -EVE_FORCEINLINE auto -log_gamma_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::log_gamma), ts ...); -} } diff --git a/include/eve/module/special/regular/impl/lrising_factorial.hpp b/include/eve/module/special/regular/impl/lrising_factorial.hpp index 96709be05f..b80a18614f 100644 --- a/include/eve/module/special/regular/impl/lrising_factorial.hpp +++ b/include/eve/module/special/regular/impl/lrising_factorial.hpp @@ -8,91 +8,194 @@ #pragma once #include -#include #include -#include +#include #include namespace eve::detail { -// general case -template -EVE_FORCEINLINE auto -lrising_factorial_(EVE_SUPPORTS(cpu_), D const& d, I n, T x) noexcept -{ - if constexpr( integral_simd_value ) - { - using elt_t = element_type_t; - using r_t = as_wide_t>; - auto nn = convert(n, as(elt_t())); - return d(lrising_factorial)(nn, r_t(x)); - } - else if constexpr( integral_scalar_value ) { return d(lrising_factorial)(T(n), x); } - else - { - using r_t = common_value_t; - return d(lrising_factorial)(r_t(n), r_t(x)); - } -} -// regular wrapping : no decorator -template -EVE_FORCEINLINE auto -lrising_factorial_(EVE_SUPPORTS(cpu_), I a, T x) noexcept -{ - return lrising_factorial(regular_type(), a, x); -} + ///////////////////////////////////////////////////////////////////////////////////////// + //utilities -// regular nan if a+x or x is negative, better computation than raw -template -EVE_FORCEINLINE auto -lrising_factorial_(EVE_SUPPORTS(cpu_), regular_type const&, T a, T x) noexcept -{ - if constexpr( has_native_abi_v ) + template + EVE_FORCEINLINE auto + inner_lrising_factorial(T a, T x) noexcept { - auto lr0 = []() { return zero(as(T())); }; - auto lrpos = [](auto a, auto x) { return inner_lrising_factorial(a, x); }; + // Assumes a>0 and a+x>0. + auto ax = eve::abs(x); + auto notdone = is_nlez(a) && is_nlez(a + x); + auto r = nan(as(x)); + auto lr0 = [](auto a, auto x) { return log_abs_gamma(x + a) - log_abs_gamma(a); }; + + auto lr1 = [](auto a, auto x) + { + const auto xoa = x / a; + const auto den = inc(xoa); + const auto d2 = sqr(den); + const auto d3 = den * d2; + const auto d5 = d3 * d2; + const auto d7 = d5 * d2; + const auto c1 = -xoa / den; + const auto c3 = -xoa * (3 + xoa * (3 + xoa)) / d3; + const auto c5 = -xoa * (5 + xoa * (10 + xoa * (10 + xoa * (5 + xoa)))) / d5; + const auto c7 = + -xoa * (7 + xoa * (21 + xoa * (35 + xoa * (35 + xoa * (21 + xoa * (7 + xoa)))))) / d7; + const auto p8 = eve::pow(inc(xoa), 8); + const auto c8 = dec(rec(p8)); /* these need not */ + const auto c9 = dec(rec(p8 * inc(xoa))); /* be very accurate */ + const auto a2 = sqr(a); + const auto a4 = sqr(a2); + const auto a6 = a4 * a2; + const auto ser_1 = c1 + c3 / (30 * a2) + c5 / (105 * a4) + c7 / (140 * a6); + const auto ser_2 = c8 / (99 * a6 * a2) - 691 * c9 / (a6 * a4) / 360360; + const auto ser = (ser_1 + ser_2) / (12 * a); + auto term1 = x * dec(eve::log(a)); // eve::log(a/M_E); + auto lg = eve::log1p(xoa); + auto term2 = (x + a - half(as(x))) * lg; + return term1 + term2 + ser; + }; + + auto lr2 = [](auto a, auto x) + { + return if_else(dist(x + a, a) < 10 * a * eps(as(a)), + eve::log1p(x * digamma(a)), + log_abs_gamma(a + x) - log_abs_gamma(a)); + }; - auto r = nan(as(a)); - auto notdone = is_nltz(x) || is_nltz(a + x); if( eve::any(notdone) ) { - notdone = next_interval(lr0, notdone, is_eqz(x), r); - if( eve::any(notdone) ) { notdone = last_interval(lrpos, notdone, r, a, x); } + auto test0 = (10 * ax > a) || (10 * ax * log(eve::max(a, T(2))) > one(as(x))); + notdone = next_interval(lr0, notdone, test0, r, a, x); + if( eve::any(notdone) ) + { + auto test1 = (10 * ax <= a) && (a > T(15)); + notdone = next_interval(lr1, notdone, test1, r, a, x); + if( eve::any(notdone) ) { notdone = last_interval(lr2, notdone, r, a, x); } + } } return r; } - else return apply_over(regular(lrising_factorial), a, x); -} + // end utilities + ///////////////////////////////////////////////////////////////////////////////////////// -// raw direct computation not matter why. nan if a+x or x is non positive -template -EVE_FORCEINLINE auto -lrising_factorial_(EVE_SUPPORTS(cpu_), raw_type const&, T a, T x) noexcept -{ - if constexpr( has_native_abi_v ) + template + as_wide_as_t lrising_factorial_(EVE_REQUIRES(cpu_), O const& d, I a, T x) noexcept { - auto notdone = is_nlez(x) && is_nlez(a + x); - return if_else(notdone, log_abs_gamma(x + a) - log_abs_gamma(a), allbits); - } - else return apply_over(raw(lrising_factorial), a, x); -} -// ----------------------------------------------------------------------------------------------- -// Masked cases + // Integral first parameter + if constexpr(integral_value ) + { + if constexpr( integral_simd_value ) + { + using elt_t = element_type_t; + using r_t = as_wide_t>; + auto aa = convert(a, as(elt_t())); + return lrising_factorial[d](aa, r_t(x)); + } + else if constexpr( integral_scalar_value ) + { + return lrising_factorial[d](T(a), x); + } + } + else + { + if constexpr( has_native_abi_v ) + { + if constexpr(O::contains(regular2)) + { + // regular nan if a+x or x is negative, better computation than raw + auto lr0 = []() { return zero(as(T())); }; + auto lrpos = [](auto a, auto x) { return inner_lrising_factorial(a, x); }; -template -EVE_FORCEINLINE auto -lrising_factorial_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, lrising_factorial(t0, ts...), t0)) -{ - return mask_op(cond, d(eve::lrising_factorial), t0, ts ...); -} + auto r = nan(as(a)); + auto notdone = is_nltz(x) || is_nltz(a + x); + if( eve::any(notdone) ) + { + notdone = next_interval(lr0, notdone, is_eqz(x), r); + if( eve::any(notdone) ) { notdone = last_interval(lrpos, notdone, r, a, x); } + } + return r; + } + else if constexpr(O::contains(raw2)) + { + // raw direct computation not matter why. nan if a+x or x is non positive + auto notdone = is_nlez(x) && is_nlez(a + x); + return if_else(notdone, log_abs_gamma(x + a) - log_abs_gamma(a), allbits); + } + else if constexpr(O::contains(pedantic2)) + { + // pedantic computes also for negative values and even negative integer values + auto r = nan(as(a)); + auto notdone = is_not_nan(a) && is_not_nan(x); -template -EVE_FORCEINLINE auto -lrising_factorial_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept -{ - return lrising_factorial[cond](eve::regular_type(), t0, ts...); -} + auto lr0 = []() { return zero(as(T())); }; + + auto lrpos = [](auto a, auto x) { return inner_lrising_factorial(a, x); }; + auto lrnegint = [](auto a, auto x) + // a and a+x are negative integers uses reflection. + { + auto r = inner_lrising_factorial(-a, -x); + return -eve::log1p(x / a) - r; + }; + + auto lraeqmx = [](auto a, auto) + // a+x = 0 + { return log_abs_gamma(inc(a)); }; + + auto lraneqmx = [](auto a, auto) + // a < 0.0 && a+x < 0.0 + { return minf(as(a)); }; + + auto lrneg = [](auto a, auto x) + { + // Reduce to positive case using reflection. + auto oma = oneminus(a); + auto spioma = sinpi(oma); + auto spiomamx = sinpi(oma - x); + auto r = inner_lrising_factorial(oma, -x); + auto lnterm = eve::log(eve::abs(spioma / spiomamx)); + return if_else(is_nez(spiomamx * spioma), lnterm - r, allbits); + }; + + auto lrlast = [](auto a, auto x) { return eve::log_abs_gamma(a + x) - eve::log_abs_gamma(a); }; + + if( eve::any(notdone) ) + { + notdone = next_interval(lr0, notdone, is_eqz(x), r); + if( eve::any(notdone) ) + { + auto testpos = is_nlez(a) && is_nlez(a + x); + notdone = next_interval(lrpos, notdone, testpos, r, a, x); + if( eve::any(notdone) ) + { + // from here a+x <= 0 || x <= 0 + auto aneg = is_ltz(a); + auto aflint = is_flint(a); + auto testnegint = aflint && is_flint(a + x) && aneg && is_ltz(a + x); + notdone = next_interval(lrnegint, notdone, testnegint, r, a, x); + if( eve::any(notdone) ) + { + notdone = next_interval(lraeqmx, notdone, is_eqz(a + x), r, a, x); + if( eve::any(notdone) ) + { + notdone = next_interval(lraneqmx, notdone, aflint && !is_eqz(a + x), r, a, x); + if( eve::any(notdone) ) + { + notdone = next_interval(lrneg, notdone, aneg && is_ltz(a + x), r, a, x); + if( eve::any(notdone) ) { notdone = last_interval(lrlast, notdone, r, a, x); } + } + } + } + } + } + } + return r; + } + else return lrising_factorial[regular](a, x); + } + else + return apply_over(regular(lrising_factorial[d]), a, x); + } + } } diff --git a/include/eve/module/special/regular/impl/omega.hpp b/include/eve/module/special/regular/impl/omega.hpp index 91f5ed0eb2..e1306355b1 100644 --- a/include/eve/module/special/regular/impl/omega.hpp +++ b/include/eve/module/special/regular/impl/omega.hpp @@ -13,59 +13,43 @@ namespace eve::detail { -template -EVE_FORCEINLINE constexpr auto -omega_(EVE_SUPPORTS(cpu_), T x) noexcept -{ - if constexpr( has_native_abi_v ) + template + constexpr T + omega_(EVE_REQUIRES(cpu_), O const&, T x) noexcept { - auto br_pos = [](auto x) + if constexpr( has_native_abi_v ) { - auto x0 = if_else(x > 1, x, -eve::log(eve::abs(x)) + eps(as(x))); - x0 = if_else(is_nan(x), x, x0); - int maxiter = 100; - for( int i = 0; i <= maxiter; ++i ) + auto br_pos = [](auto x) + { + auto x0 = if_else(x > 1, x, -eve::log(eve::abs(x)) + eps(as(x))); + x0 = if_else(is_nan(x), x, x0); + int maxiter = 100; + for( int i = 0; i <= maxiter; ++i ) + { + auto dx0 = x0 * (log(x0) + x0 - x) / inc(x0); + auto test = is_not_greater(eve::abs(dx0), 2 * eps(as(x)) * eve::max(x, one(as(x)))); + if( eve::all(test) ) break; + x0 = eve::max(eve::smallestposval(as(x0)), x0 - if_else(test, zero, dx0)); + } + return x0; + }; + auto br_neg = [](auto x) + { + auto [w0, wm1] = lambert(exp(x)); + return w0; + }; + auto r = nan(as()); // nan case treated here + r = if_else(x == one(as(x)), one, r); + r = if_else(x == inf(as(x)), inf(as(x)), r); + r = if_else(x == zero(as(x)), T(0.56714329040978384011), r); + auto notdone = is_nan(r); + if( eve::any(notdone) ) { - auto dx0 = x0 * (log(x0) + x0 - x) / inc(x0); - auto test = is_not_greater(eve::abs(dx0), 2 * eps(as(x)) * eve::max(x, one(as(x)))); - if( eve::all(test) ) break; - x0 = eve::max(eve::smallestposval(as(x0)), x0 - if_else(test, zero, dx0)); + notdone = next_interval(br_pos, notdone, x >= one(as(x)), r, x); + if( eve::any(notdone) ) { notdone = last_interval(br_neg, notdone, r, x); } } - return x0; - }; - auto br_neg = [](auto x) - { - auto [w0, wm1] = lambert(exp(x)); - return w0; - }; - auto r = nan(as()); // nan case treated here - r = if_else(x == one(as(x)), one, r); - r = if_else(x == inf(as(x)), inf(as(x)), r); - r = if_else(x == zero(as(x)), T(0.56714329040978384011), r); - auto notdone = is_nan(r); - if( eve::any(notdone) ) - { - notdone = next_interval(br_pos, notdone, x >= one(as(x)), r, x); - if( eve::any(notdone) ) { notdone = last_interval(br_neg, notdone, r, x); } + return r; } - return r; + else return apply_over(omega, x); } - else return apply_over(omega, x); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -omega_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::omega, ts ...); -} - -template -EVE_FORCEINLINE auto -omega_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::omega), ts ...); -} } diff --git a/include/eve/module/special/regular/impl/rising_factorial.hpp b/include/eve/module/special/regular/impl/rising_factorial.hpp index 6639a1569e..348fcc06e3 100644 --- a/include/eve/module/special/regular/impl/rising_factorial.hpp +++ b/include/eve/module/special/regular/impl/rising_factorial.hpp @@ -17,78 +17,49 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -rising_factorial_(EVE_SUPPORTS(cpu_), D const & d, I n, T x) noexcept -{ - if constexpr( integral_simd_value ) - { - using elt_t = element_type_t; - using r_t = as_wide_t>; - auto nn = convert(n, as(elt_t())); - return d(rising_factorial)(nn, r_t(x)); - } - else if constexpr( integral_scalar_value ) { return d(rising_factorial)(T(n), x); } - else - { - using r_t = common_value_t; - return d(rising_factorial)(r_t(n), r_t(x)); - } -} - -// regular nan if a+x or x is nnegative, better computation than raw -template -EVE_FORCEINLINE auto -rising_factorial_(EVE_SUPPORTS(cpu_) - , T a - , T x) noexcept -{ - if constexpr( has_native_abi_v ) - { - auto lrn = lrising_factorial(a, x); - return eve::exp(lrn); - } - else return apply_over(rising_factorial, a, x); -} - -// regular wrapping : no decorator -template -EVE_FORCEINLINE auto -rising_factorial_(EVE_SUPPORTS(cpu_), I a, T x) noexcept -{ - if constexpr( std::is_integral_v> ) - return regular(rising_factorial)(convert(a, as(element_type_t())), x); - else + template + EVE_FORCEINLINE as_wide_as_t + rising_factorial_(EVE_REQUIRES(cpu_), O const& d, I a, T x) noexcept { - using r_t = common_value_t; - return rising_factorial(r_t(a), r_t(x)); + // Integral first parameter + if constexpr(integral_value ) + { + if constexpr( integral_simd_value ) + { + using elt_t = element_type_t; + using r_t = as_wide_t>; + auto aa = convert(a, as(elt_t())); + return rising_factorial[d](aa, r_t(x)); + } + else if constexpr( integral_scalar_value ) + { + return rising_factorial[d](T(a), x); + } + } + else + { + if constexpr( has_native_abi_v ) + { + if constexpr(O::contains(raw2)) + { + // raw direct computation not matter why. nan if a+x or x is non positive + return eve::tgamma(x + a) / tgamma(a); + } + else if constexpr(O::contains(pedantic2)) + { + auto sga = if_else(is_flint(a), one, signgam(a)); + auto sgapx = if_else(is_flint(a + x), one, signgam(a + x)); + return eve::exp(lrising_factorial[pedantic](a, x))*(sga * sgapx); + } + else if constexpr(O::contains(regular2)) + { + // regular nan if a+x or x is nnegative, better computation than raw + return eve::exp(lrising_factorial(a, x)); + } + else return eve::exp(lrising_factorial[regular](a, x)); + } + else + return apply_over(regular(rising_factorial[d]), a, x); + } } } - -// raw -template -EVE_FORCEINLINE auto -rising_factorial_(EVE_SUPPORTS(cpu_), raw_type const&, T a, T x) noexcept -{ - if constexpr( has_native_abi_v ) { return eve::tgamma(x + a) / tgamma(a); } - else return apply_over(regular_type()(rising_factorial), a, x); -} - - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -rising_factorial_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype(if_else(cond, rising_factorial(t0, ts...), t0)) -{ - return mask_op(cond, eve::rising_factorial, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -rising_factorial_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::rising_factorial), t0, ts ...); -} -} diff --git a/include/eve/module/special/regular/impl/signgam.hpp b/include/eve/module/special/regular/impl/signgam.hpp index 24c46399cb..7748285f13 100644 --- a/include/eve/module/special/regular/impl/signgam.hpp +++ b/include/eve/module/special/regular/impl/signgam.hpp @@ -11,35 +11,19 @@ namespace eve::detail { -template -EVE_FORCEINLINE constexpr T -signgam_(EVE_SUPPORTS(cpu_), T a0) noexcept -{ - if constexpr( has_native_abi_v ) + template + EVE_FORCEINLINE constexpr T + signgam_(EVE_REQUIRES(cpu_), O const &, T a0) { - auto isleza0 = is_ngtz(a0); - auto a = if_else(is_flint(a0) || is_infinite(a0), - eve::allbits, - oneminus(binarize(is_odd(floor(a0))) * T(2))); - a = if_else(is_eqz(a0), bit_or(one(as(a0)), bitofsign(a0)), a); - return if_else(is_nan(a0), a0, if_else(isleza0, a, eve::one)); + if constexpr( has_native_abi_v ) + { + auto isleza0 = is_ngtz(a0); + auto a = if_else(is_flint(a0) || is_infinite(a0), + eve::allbits, + oneminus(binarize(is_odd(floor(a0))) * T(2))); + a = if_else(is_eqz(a0), bit_or(one(as(a0)), bitofsign(a0)), a); + return if_else(is_nan(a0), a0, if_else(isleza0, a, eve::one)); + } + else return apply_over(signgam, a0); } - else return apply_over(signgam, a0); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -signgam_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::signgam, ts ...); -} - -template -EVE_FORCEINLINE auto -signgam_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::signgam), ts ...); -} } diff --git a/include/eve/module/special/regular/impl/stirling.hpp b/include/eve/module/special/regular/impl/stirling.hpp index 1414cf1429..173216b05c 100644 --- a/include/eve/module/special/regular/impl/stirling.hpp +++ b/include/eve/module/special/regular/impl/stirling.hpp @@ -13,10 +13,9 @@ namespace eve::detail { - template - EVE_FORCEINLINE constexpr T - stirling_(EVE_SUPPORTS(cpu_), D const&, T a0) noexcept - requires(is_one_of(types {})) + template + constexpr T + stirling_(EVE_REQUIRES(cpu_), O const&, T a0) noexcept { using elt_t = element_type_t; if constexpr( has_native_abi_v ) @@ -82,27 +81,4 @@ namespace eve::detail } else return apply_over(stirling, a0); } - - template - EVE_FORCEINLINE constexpr T - stirling_(EVE_SUPPORTS(cpu_), T const& x) noexcept - { - return stirling(regular_type(), x); - } - -// ----------------------------------------------------------------------------------------------- -// Masked cases - template - EVE_FORCEINLINE auto - stirling_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept - { - return mask_op(cond, eve::stirling, ts ...); - } - - template - EVE_FORCEINLINE auto - stirling_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept - { - return mask_op(cond, d(eve::stirling), ts ...); - } } diff --git a/include/eve/module/special/regular/impl/tgamma.hpp b/include/eve/module/special/regular/impl/tgamma.hpp index 10f01e88d8..16b344fb04 100644 --- a/include/eve/module/special/regular/impl/tgamma.hpp +++ b/include/eve/module/special/regular/impl/tgamma.hpp @@ -14,183 +14,157 @@ namespace eve::detail { -template -EVE_FORCEINLINE T -tgamma_(EVE_SUPPORTS(cpu_), D const&, T a0) noexcept - requires(is_one_of(types {})) -{ - if constexpr( has_native_abi_v ) + template + T tgamma_(EVE_REQUIRES(cpu_), O const&, T a0) noexcept { - using elt_t = element_type_t; - auto tgamma1 = [](T x) - { - if constexpr( std::is_same_v ) - { - return eve::reverse_horner(x, T(0x1.000000p+0f), T(0x1.b0ef32p-2f), T(0x1.a5a822p-2f), T(0x1.505468p-4f) - , T(0x1.275cf8p-4f), T(0x1.23b628p-8f), T(0x1.521932p-8f), T(0x1.a51644p-10f)); - } - else if constexpr( std::is_same_v ) - { - return eve::reverse_horner(x, T(0x1.0000000000000p+0), T(0x1.fa1373993e312p-2), T(0x1.a8da9dcae7d31p-3) - , T(0x1.863d918c423d3p-5), T(0x1.557cde9db14b0p-7), T(0x1.384e3e686bfabp-10) - , T(0x1.4fcb839982153p-13)) - / - eve::reverse_horner(x, T(0x1.0000000000000p+0), T(0x1.24944c9cd3c51p-4), T(-0x1.e071a9d4287c2p-3) - , T(0x1.25779e33fde67p-5), T(0x1.831ed5b1bb117p-7), T(-0x1.240e4e750b44ap-8) - , T(0x1.1ae8a29152573p-11), T(-0x1.8487a8400d3afp-16)); - } - }; - if constexpr( scalar_value ) + if constexpr( has_native_abi_v ) { - auto inftest = [](T a0) - { - if constexpr( std::is_same_v ) { return a0 > 35.4f; } - else if constexpr( std::is_same_v ) { return a0 > 171.624; }; - }; - if( is_eqz(a0) ) return copysign(inf(eve::as(a0)), a0); - if constexpr( eve::platform::supports_nans ) - { - if( is_nan(a0) || (a0 == minf(eve::as(a0))) ) return nan(eve::as(a0)); - if( a0 == inf(eve::as(a0)) ) return a0; - } - auto x = a0; - if( inftest(a0) ) return inf(eve::as(a0)); - auto q = abs(x); - if( x < T(-33) ) - { - T st = stirling(q); - T p = floor(q); - auto iseven = is_even((int32_t)p); - if( p == q ) return nan(eve::as(a0)); - T z = q - p; - if( z > half(eve::as(a0)) ) + using elt_t = element_type_t; + auto tgamma1 = [](T x) { - p += one(eve::as(a0)); - z = q - p; - } - z = q * sinpi(z); - if( is_eqz(z) ) return nan(eve::as(a0)); - st = pi(eve::as(a0)) / (abs(z) * st); - return iseven ? -st : st; - } - T z = one(eve::as(a0)); - while( x >= T(3) ) - { - x -= one(eve::as(a0)); - z *= x; - } - while( is_ltz(x) ) - { - z /= x; - x += one(eve::as(a0)); - } - while( x < T(2) ) - { - if( is_eqz(x) ) return nan(eve::as(a0)); - z /= x; - x += one(eve::as(a0)); - } - if( x == T(2) ) return (z); - x -= T(2); - return z * tgamma1(x); - } - else if constexpr( simd_value ) - { - auto large_negative = [](T q) - { - auto st = stirling(q); - auto p = floor(q); - auto sgngam = if_else(is_even(p), one(eve::as(q)), eve::mone); - auto z = q - p; - auto test2 = is_less(z, half(eve::as(q))); - z = dec[test2](z); - z = q * sinpi(z); - z = abs(z); - return sgngam * pi(eve::as(q)) / (z * st); - }; - auto other = [tgamma1](T q, const auto& test) + if constexpr( std::is_same_v ) + { + return eve::reverse_horner(x, T(0x1.000000p+0f), T(0x1.b0ef32p-2f), T(0x1.a5a822p-2f), T(0x1.505468p-4f) + , T(0x1.275cf8p-4f), T(0x1.23b628p-8f), T(0x1.521932p-8f), T(0x1.a51644p-10f)); + } + else if constexpr( std::is_same_v ) + { + return eve::reverse_horner(x, T(0x1.0000000000000p+0), T(0x1.fa1373993e312p-2), T(0x1.a8da9dcae7d31p-3) + , T(0x1.863d918c423d3p-5), T(0x1.557cde9db14b0p-7), T(0x1.384e3e686bfabp-10) + , T(0x1.4fcb839982153p-13)) / + eve::reverse_horner(x, T(0x1.0000000000000p+0), T(0x1.24944c9cd3c51p-4), T(-0x1.e071a9d4287c2p-3) + , T(0x1.25779e33fde67p-5), T(0x1.831ed5b1bb117p-7), T(-0x1.240e4e750b44ap-8) + , T(0x1.1ae8a29152573p-11), T(-0x1.8487a8400d3afp-16)); + } + }; + if constexpr( scalar_value ) { - auto x = if_else(test, T(2), q); + auto inftest = [](T a0) + { + if constexpr( std::is_same_v ) { return a0 > 35.4f; } + else if constexpr( std::is_same_v ) { return a0 > 171.624; }; + }; + if( is_eqz(a0) ) return copysign(inf(eve::as(a0)), a0); if constexpr( eve::platform::supports_nans ) { - auto inf_result = q == inf(eve::as()); - x = if_else(inf_result, T(2), x); + if( is_nan(a0) || (a0 == minf(eve::as(a0))) ) return nan(eve::as(a0)); + if( a0 == inf(eve::as(a0)) ) return a0; } - auto z = one(eve::as()); - auto test1 = (x >= T(3)); - while( eve::any(test1) ) + auto x = a0; + if( inftest(a0) ) return inf(eve::as(a0)); + auto q = abs(x); + if( x < T(-33) ) { - x = dec[test1](x); - z = if_else(test1, z * x, z); - test1 = (x >= T(3)); + T st = stirling(q); + T p = floor(q); + auto iseven = is_even((int32_t)p); + if( p == q ) return nan(eve::as(a0)); + T z = q - p; + if( z > half(eve::as(a0)) ) + { + p += one(eve::as(a0)); + z = q - p; + } + z = q * sinpi(z); + if( is_eqz(z) ) return nan(eve::as(a0)); + st = pi(eve::as(a0)) / (abs(z) * st); + return iseven ? -st : st; } - // all x are less than 3 - test1 = is_ltz(x); - while( eve::any(test1) ) + T z = one(eve::as(a0)); + while( x >= T(3) ) { - z = if_else(test1, z / x, z); - x = inc[test1](x); - test1 = is_ltz(x); + x -= one(eve::as(a0)); + z *= x; } - // all x are greater than 0 and less than 3 - auto test2 = is_less(x, T(2)); - while( eve::any(test2) ) + while( is_ltz(x) ) { - z = if_else(test2, z / x, z); - x = inc[test2](x); - test2 = is_less(x, T(2)); + z /= x; + x += one(eve::as(a0)); } - // all x are greater equal 2 and less than 3 - x = z * tgamma1(x - T(2)); - if constexpr( eve::platform::supports_infinites ) + while( x < T(2) ) { - auto inf_result = (q == inf(eve::as())); - return if_else(inf_result, q, x); + if( is_eqz(x) ) return nan(eve::as(a0)); + z /= x; + x += one(eve::as(a0)); } - else return x; - }; - - auto nan_result = is_ltz(a0) && is_flint(a0); - // if constexpr(eve::platform::supports_nans) nan_result = is_nan(a0) || nan_result; - auto q = abs(a0); - auto test = is_less(a0, T(-33.0)); - std::size_t nb = eve::count_true(test); - auto r = nan(eve::as(a0)); - if( nb > 0 ) - { - // treat negative large with reflection - r = large_negative(q); - if( nb >= cardinal_v ) return if_else(nan_result, eve::allbits, r); + if( x == T(2) ) return (z); + x -= T(2); + return z * tgamma1(x); } - auto r1 = other(a0, test); - auto r2 = if_else(test, r, r1); - return if_else( + else if constexpr( simd_value ) + { + auto large_negative = [](T q) + { + auto st = stirling(q); + auto p = floor(q); + auto sgngam = if_else(is_even(p), one(eve::as(q)), eve::mone); + auto z = q - p; + auto test2 = is_less(z, half(eve::as(q))); + z = dec[test2](z); + z = q * sinpi(z); + z = abs(z); + return sgngam * pi(eve::as(q)) / (z * st); + }; + auto other = [tgamma1](T q, const auto& test) + { + auto x = if_else(test, T(2), q); + if constexpr( eve::platform::supports_nans ) + { + auto inf_result = q == inf(eve::as()); + x = if_else(inf_result, T(2), x); + } + auto z = one(eve::as()); + auto test1 = (x >= T(3)); + while( eve::any(test1) ) + { + x = dec[test1](x); + z = if_else(test1, z * x, z); + test1 = (x >= T(3)); + } + // all x are less than 3 + test1 = is_ltz(x); + while( eve::any(test1) ) + { + z = if_else(test1, z / x, z); + x = inc[test1](x); + test1 = is_ltz(x); + } + // all x are greater than 0 and less than 3 + auto test2 = is_less(x, T(2)); + while( eve::any(test2) ) + { + z = if_else(test2, z / x, z); + x = inc[test2](x); + test2 = is_less(x, T(2)); + } + // all x are greater equal 2 and less than 3 + x = z * tgamma1(x - T(2)); + if constexpr( eve::platform::supports_infinites ) + { + auto inf_result = (q == inf(eve::as())); + return if_else(inf_result, q, x); + } + else return x; + }; + + auto nan_result = is_ltz(a0) && is_flint(a0); + // if constexpr(eve::platform::supports_nans) nan_result = is_nan(a0) || nan_result; + auto q = abs(a0); + auto test = is_less(a0, T(-33.0)); + std::size_t nb = eve::count_true(test); + auto r = nan(eve::as(a0)); + if( nb > 0 ) + { + // treat negative large with reflection + r = large_negative(q); + if( nb >= cardinal_v ) return if_else(nan_result, eve::allbits, r); + } + auto r1 = other(a0, test); + auto r2 = if_else(test, r, r1); + return if_else( is_eqz(a0), copysign(inf(eve::as(a0)), a0), if_else(nan_result, eve::allbits, r2)); + } } + else return apply_over(tgamma, a0); } - else return apply_over(tgamma, a0); -} - -template -EVE_FORCEINLINE constexpr T -tgamma_(EVE_SUPPORTS(cpu_), T const& x) noexcept -{ - return tgamma(regular_type(), x); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -tgamma_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::tgamma, ts ...); -} - -template -EVE_FORCEINLINE auto -tgamma_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::tgamma), ts ...); -} } diff --git a/include/eve/module/special/regular/impl/zeta.hpp b/include/eve/module/special/regular/impl/zeta.hpp index c97fd28f72..d0638e3f1d 100644 --- a/include/eve/module/special/regular/impl/zeta.hpp +++ b/include/eve/module/special/regular/impl/zeta.hpp @@ -14,78 +14,61 @@ namespace eve::detail { -template -EVE_FORCEINLINE T -zeta_(EVE_SUPPORTS(cpu_), T x) noexcept -{ - if constexpr( has_native_abi_v ) + template + T zeta_(EVE_REQUIRES(cpu_), O const&, T x) { - using elt_t = element_type_t; - // - // This is algorithm 3 from: - // - // "An Efficient Algorithm for the Riemann Zeta Function", P. Borwein, - // Canadian Mathematical Society, Conference Proceedings. - // See: http://www.cecm.sfu.ca/personal/pborwein/PAPERS/P155.pdf - // - auto zetp = [](auto s) - { - auto sc = oneminus(s); - const int n = if_else(sizeof(elt_t) == 8, 18, 7); - auto sum(zero(as(s))); - auto two_n = ldexp(T(1), n); - auto ej_sign(one(as(s))); - for( int j = 1; j <= n; ++j ) - { - sum += ej_sign * -two_n * pow_abs(T(j), -s); - ej_sign = -ej_sign; - } - auto ej_sum(one(as(s))); - auto ej_term(one(as(s))); - for( int j = n; j <= 2 * n - 1; ++j ) - { - sum += ej_sign * (ej_sum - two_n) * pow_abs(T(inc(j)), -s); - ej_sign = -ej_sign; - ej_term *= 2 * n - j; - ej_term /= j - n + 1; - ej_sum += ej_term; - } - auto z = -sum / (two_n * (-powm1(T(2), sc))); - return if_else(s == one(as(s)), allbits, z); - }; - auto r = nan(as(x)); - auto notdone = x != one(as(x)) || is_not_nan(x); - if( eve::any(notdone) ) + if constexpr( has_native_abi_v ) { - notdone = next_interval(zetp, notdone, is_gez(x), r, x); + using elt_t = element_type_t; + // + // This is algorithm 3 from: + // + // "An Efficient Algorithm for the Riemann Zeta Function", P. Borwein, + // Canadian Mathematical Society, Conference Proceedings. + // See: http://www.cecm.sfu.ca/personal/pborwein/PAPERS/P155.pdf + // + auto zetp = [](auto s) + { + auto sc = oneminus(s); + const int n = if_else(sizeof(elt_t) == 8, 18, 7); + auto sum(zero(as(s))); + auto two_n = ldexp(T(1), n); + auto ej_sign(one(as(s))); + for( int j = 1; j <= n; ++j ) + { + sum += ej_sign * -two_n * pow_abs(T(j), -s); + ej_sign = -ej_sign; + } + auto ej_sum(one(as(s))); + auto ej_term(one(as(s))); + for( int j = n; j <= 2 * n - 1; ++j ) + { + sum += ej_sign * (ej_sum - two_n) * pow_abs(T(inc(j)), -s); + ej_sign = -ej_sign; + ej_term *= 2 * n - j; + ej_term /= j - n + 1; + ej_sum += ej_term; + } + auto z = -sum / (two_n * (-powm1(T(2), sc))); + return if_else(s == one(as(s)), allbits, z); + }; + auto r = nan(as(x)); + auto notdone = x != one(as(x)) || is_not_nan(x); if( eve::any(notdone) ) { - auto reflec = [zetp](auto x) + notdone = next_interval(zetp, notdone, is_gez(x), r, x); + if( eve::any(notdone) ) { - auto vp1 = oneminus(x); // 1-x - return 2 * pow_abs(2 * pi(as(x)), -vp1) * cospi(T(0.5) * vp1) * tgamma(vp1) * zetp(vp1); - }; - last_interval(reflec, notdone, r, x); + auto reflec = [zetp](auto x) + { + auto vp1 = oneminus(x); // 1-x + return 2 * pow_abs(2 * pi(as(x)), -vp1) * cospi(T(0.5) * vp1) * tgamma(vp1) * zetp(vp1); + }; + last_interval(reflec, notdone, r, x); + } } + return r; } - return r; + else return apply_over(zeta, x); } - else return apply_over(zeta, x); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -zeta_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::zeta, ts ...); -} - -template -EVE_FORCEINLINE auto -zeta_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::zeta), ts ...); -} } diff --git a/include/eve/module/special/regular/lambert.hpp b/include/eve/module/special/regular/lambert.hpp index 96c933ab24..1d4a94a414 100644 --- a/include/eve/module/special/regular/lambert.hpp +++ b/include/eve/module/special/regular/lambert.hpp @@ -7,10 +7,23 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct lambert_t : elementwise_callable + { + template + EVE_FORCEINLINE + kumi::tuple + operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(lambert_t, lambert_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -51,7 +64,7 @@ namespace eve //! @godbolt{doc/special/regular/lambert.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(lambert_, lambert); +inline constexpr auto lambert = functor; } #include diff --git a/include/eve/module/special/regular/lbeta.hpp b/include/eve/module/special/regular/lbeta.hpp index e76626649d..c6331ba32b 100644 --- a/include/eve/module/special/regular/lbeta.hpp +++ b/include/eve/module/special/regular/lbeta.hpp @@ -6,10 +6,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { +template +struct lbeta_t : elementwise_callable +{ + template + EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(lbeta_t, lbeta_); +}; + //================================================================================================ //! @addtogroup special //! @{ @@ -43,7 +55,7 @@ namespace eve //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(lbeta_, lbeta); + inline constexpr auto lbeta = functor; } #include diff --git a/include/eve/module/special/regular/lfactorial.hpp b/include/eve/module/special/regular/lfactorial.hpp index dcc5a5d9f0..4a265ed0ba 100644 --- a/include/eve/module/special/regular/lfactorial.hpp +++ b/include/eve/module/special/regular/lfactorial.hpp @@ -6,11 +6,27 @@ */ //================================================================================================== #pragma once - -#include +#include +#include +#include namespace eve { + template + struct lfactorial_t : elementwise_callable + { + template + EVE_FORCEINLINE + as_wide_as_t + operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + template + EVE_FORCEINLINE + T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(lfactorial_t, lfactorial_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -29,14 +45,14 @@ namespace eve //! @code //! namespace eve //! { -//! template< eve::value N > +//! template< eve::ordered_value N > //! auto lfactorial(N x) noexcept; //! } //! @endcode //! //! **Parameters** //! -//! * `n` : [integer or flint argument](@ref eve::value). +//! * `n` : [integer or flint argument](@ref eve::ordered_value). //! //! **Return value** //! @@ -45,14 +61,14 @@ namespace eve //! [element type](eve::element_type) is always double to try to avoid overflow. //! * If the entry is a [floating point value](eve::floating_point_value) //! which must be a flint, the result is of the same type as the entry. -//! * If `n` elements are not integer or flint the result is undefined. +//! * If `n` elements are nor integer nor flint the result is undefined. //! //! @groupheader{Example} //! //! @godbolt{doc/special/regular/lfactorial.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(lfactorial_, lfactorial); +inline constexpr auto lfactorial = functor; } #include diff --git a/include/eve/module/special/regular/log_abs_gamma.hpp b/include/eve/module/special/regular/log_abs_gamma.hpp index 6f9fe1d367..4369079b71 100644 --- a/include/eve/module/special/regular/log_abs_gamma.hpp +++ b/include/eve/module/special/regular/log_abs_gamma.hpp @@ -7,11 +7,18 @@ //================================================================================================== #pragma once -#include -#include - namespace eve { + template + struct log_abs_gamma_t : elementwise_callable + { + template + EVE_FORCEINLINE + T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(log_abs_gamma_t, log_abs_gamma_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -36,7 +43,7 @@ namespace eve //! //! **Parameters** //! -//! * `x` : [real argument](@ref eve::value). +//! * `x` : [real argument](@ref eve::floating_ordered_value). //! //! **Return value** //! @@ -47,7 +54,7 @@ namespace eve //! @godbolt{doc/special/regular/log_abs_gamma.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(log_abs_gamma_, log_abs_gamma); +inline constexpr auto log_abs_gamma = functor; } #include diff --git a/include/eve/module/special/regular/log_gamma.hpp b/include/eve/module/special/regular/log_gamma.hpp index 9f0335bf2e..9ad0e20ca5 100644 --- a/include/eve/module/special/regular/log_gamma.hpp +++ b/include/eve/module/special/regular/log_gamma.hpp @@ -6,12 +6,21 @@ */ //================================================================================================== #pragma once - #include -#include +#include +#include namespace eve { + template + struct log_gamma_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(log_gamma_t, log_gamma_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -50,7 +59,7 @@ namespace eve //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(log_gamma_, log_gamma); +inline constexpr auto log_gamma = functor; } #include diff --git a/include/eve/module/special/regular/lrising_factorial.hpp b/include/eve/module/special/regular/lrising_factorial.hpp index 0ac2ef3301..b2c2ce651a 100644 --- a/include/eve/module/special/regular/lrising_factorial.hpp +++ b/include/eve/module/special/regular/lrising_factorial.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct lrising_factorial_t : elementwise_callable + { + template + EVE_FORCEINLINE + auto operator()(I a, T b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(lrising_factorial_t, lrising_factorial_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -70,7 +82,7 @@ namespace eve //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(lrising_factorial_, lrising_factorial); + inline constexpr auto lrising_factorial = functor; } #include diff --git a/include/eve/module/special/regular/omega.hpp b/include/eve/module/special/regular/omega.hpp index dbccd73498..33f9cf6941 100644 --- a/include/eve/module/special/regular/omega.hpp +++ b/include/eve/module/special/regular/omega.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct omega_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(omega_t, omega_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -47,7 +58,8 @@ namespace eve //! @godbolt{doc/special/regular/omega.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(omega_, omega); +inline constexpr auto omega = functor; } + #include diff --git a/include/eve/module/special/regular/rising_factorial.hpp b/include/eve/module/special/regular/rising_factorial.hpp index 4cc59d3357..c541c8924f 100644 --- a/include/eve/module/special/regular/rising_factorial.hpp +++ b/include/eve/module/special/regular/rising_factorial.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct rising_factorial_t : elementwise_callable + { + template + EVE_FORCEINLINE + auto operator()(I a, T b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(rising_factorial_t, rising_factorial_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -71,7 +83,7 @@ namespace eve //! //! @} //================================================================================================ -EVE_MAKE_CALLABLE(rising_factorial_, rising_factorial); + inline constexpr auto rising_factorial = functor; } #include diff --git a/include/eve/module/special/regular/signgam.hpp b/include/eve/module/special/regular/signgam.hpp index 86c47bf22a..5fbc32ad2e 100644 --- a/include/eve/module/special/regular/signgam.hpp +++ b/include/eve/module/special/regular/signgam.hpp @@ -7,10 +7,20 @@ #pragma once #include -#include +#include +#include namespace eve { + template + struct signgam_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(signgam_t, signgam_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -46,7 +56,7 @@ namespace eve //! @godbolt{doc/special/regular/signgam.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(signgam_, signgam); +inline constexpr auto signgam = functor; } #include diff --git a/include/eve/module/special/regular/stirling.hpp b/include/eve/module/special/regular/stirling.hpp index 2c886de717..7f6761f948 100644 --- a/include/eve/module/special/regular/stirling.hpp +++ b/include/eve/module/special/regular/stirling.hpp @@ -6,10 +6,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct stirling_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(stirling_t, stirling_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -50,7 +61,7 @@ namespace eve //! @godbolt{doc/special/regular/stirling.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(stirling_, stirling); +inline constexpr auto stirling = functor; } #include diff --git a/include/eve/module/special/regular/tgamma.hpp b/include/eve/module/special/regular/tgamma.hpp index 194e3b61f6..150041b0b9 100644 --- a/include/eve/module/special/regular/tgamma.hpp +++ b/include/eve/module/special/regular/tgamma.hpp @@ -7,10 +7,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct tgamma_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(tgamma_t, tgamma_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -48,8 +59,7 @@ namespace eve //! //! @} //================================================================================================ - -EVE_MAKE_CALLABLE(tgamma_, tgamma); +inline constexpr auto tgamma = functor; } #include diff --git a/include/eve/module/special/regular/zeta.hpp b/include/eve/module/special/regular/zeta.hpp index 1dbaac2d5f..e475fec95d 100644 --- a/include/eve/module/special/regular/zeta.hpp +++ b/include/eve/module/special/regular/zeta.hpp @@ -6,10 +6,21 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct zeta_t : elementwise_callable + { + template + EVE_FORCEINLINE T operator()(T v) const noexcept { return EVE_DISPATCH_CALL(v); } + + EVE_CALLABLE_OBJECT(zeta_t, zeta_); + }; + //================================================================================================ //! @addtogroup special //! @{ @@ -46,7 +57,7 @@ namespace eve //! @godbolt{doc/special/regular/zeta.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(zeta_, zeta); +inline constexpr auto zeta = functor; } #include diff --git a/include/eve/traits/overload/default_behaviors.hpp b/include/eve/traits/overload/default_behaviors.hpp index d986f303be..5a934afc4e 100644 --- a/include/eve/traits/overload/default_behaviors.hpp +++ b/include/eve/traits/overload/default_behaviors.hpp @@ -56,7 +56,7 @@ namespace eve } protected: - constexpr Func const& derived() const { return static_cast>(*this); } + constexpr Func const& derived() const { return static_castconst&>(*this); } }; //==================================================================================================================== diff --git a/test/doc/special/pedantic/lrising_factorial.cpp b/test/doc/special/pedantic/lrising_factorial.cpp index d4f6089853..e68d286f9a 100644 --- a/test/doc/special/pedantic/lrising_factorial.cpp +++ b/test/doc/special/pedantic/lrising_factorial.cpp @@ -13,6 +13,6 @@ int main() std::cout << "---- simd" << std::setprecision(17) << '\n' << " <- n = " << n << '\n' << " <- x = " << x << '\n' - << " -> pedantic(lrising_factorial(n, x)) = " << eve::pedantic(eve::lrising_factorial)(n, x) << '\n'; + << " -> pedantic(lrising_factorial(n, x)) = " << eve::lrising_factorial[eve::pedantic](n, x) << '\n'; return 0; } diff --git a/test/doc/special/pedantic/rising_factorial.cpp b/test/doc/special/pedantic/rising_factorial.cpp index d9ab292c74..2daab846af 100644 --- a/test/doc/special/pedantic/rising_factorial.cpp +++ b/test/doc/special/pedantic/rising_factorial.cpp @@ -13,6 +13,6 @@ int main() std::cout << "---- simd" << std::setprecision(17) << '\n' << " <- n = " << n << '\n' << " <- x = " << x << '\n' - << " -> pedantic(rising_factorial)(n, x) = " << eve::pedantic(eve::rising_factorial)(n, x) << '\n'; + << " -> pedantic(rising_factorial)(n, x) = " << eve::rising_factorial[eve::pedantic](n, x) << '\n'; return 0; } diff --git a/test/doc/special/raw/lrising_factorial.cpp b/test/doc/special/raw/lrising_factorial.cpp index c1fff9c27e..8a7ef2c180 100644 --- a/test/doc/special/raw/lrising_factorial.cpp +++ b/test/doc/special/raw/lrising_factorial.cpp @@ -13,7 +13,7 @@ int main() std::cout << "---- simd" << std::setprecision(17) << '\n' << " <- n = " << n << '\n' << " <- x = " << x << '\n' - << " -> raw(lrising_factorial(n, x)) = " << eve::raw(eve::lrising_factorial)(n, x) << '\n'; + << " -> raw(lrising_factorial(n, x)) = " << eve::lrising_factorial[eve::raw](n, x) << '\n'; return 0; } diff --git a/test/doc/special/raw/rising_factorial.cpp b/test/doc/special/raw/rising_factorial.cpp index ac1347a2ac..e1671b4c1f 100644 --- a/test/doc/special/raw/rising_factorial.cpp +++ b/test/doc/special/raw/rising_factorial.cpp @@ -13,6 +13,6 @@ int main() std::cout << "---- simd" << std::setprecision(17) << '\n' << " <- n = " << n << '\n' << " <- x = " << x << '\n' - << " -> raw(rising_factorial)(n, x) = " << eve::raw(eve::rising_factorial)(n, x) << '\n'; + << " -> raw(rising_factorial)(n, x) = " << eve::rising_factorial[eve::raw](n, x) << '\n'; return 0; } diff --git a/test/unit/module/special/lbeta.cpp b/test/unit/module/special/lbeta.cpp new file mode 100644 index 0000000000..f9eacac65b --- /dev/null +++ b/test/unit/module/special/lbeta.cpp @@ -0,0 +1,81 @@ +//================================================================================================== +/** + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +**/ +//================================================================================================== +#include "test.hpp" + +#include + +#include + +//================================================================================================== +// Types tests +//================================================================================================== +TTS_CASE_TPL("Check return types of lbeta", eve::test::simd::ieee_reals) +(tts::type) +{ + using v_t = eve::element_type_t; + + TTS_EXPR_IS(eve::lbeta(T(), T()), T); + TTS_EXPR_IS(eve::lbeta(T(), v_t()), T); + TTS_EXPR_IS(eve::lbeta(v_t(), T()), T); + TTS_EXPR_IS(eve::lbeta(v_t(), v_t()), v_t); +}; + +//================================================================================================== +// lbeta tests +//================================================================================================== +TTS_CASE_WITH("Check behavior of lbeta on wide", + eve::test::simd::ieee_reals, + tts::generate(tts::randoms(0.0, 10.0), tts::randoms(0.0, 10.0))) +([[maybe_unused]] T const& a0, [[maybe_unused]] T const& a1) +{ + using eve::as; + using eve::lbeta; + +#if defined(__cpp_lib_math_special_functions) + using elt_t = eve::element_type_t; + + TTS_ULP_EQUAL( + eve::lbeta(a0, a1), map([&](auto e, auto f) -> elt_t { return std::log(std::beta(e, f)); }, a0, a1), 64); + TTS_ULP_EQUAL(lbeta(T(-0.0), T(-0.0)), T(std::log(std::beta(elt_t(-0.0), elt_t(-0.0)))), 0); + TTS_ULP_EQUAL(lbeta(T(0.0), T(0.0)), T(std::log(std::beta(elt_t(0.0), elt_t(0.0)))), 0); + TTS_ULP_EQUAL(lbeta(T(1.0), T(1.0)), T(std::log(std::beta(elt_t(1.0), elt_t(1.0)))), 0); + TTS_ULP_EQUAL(lbeta(T(2.0), T(3.0)), T(std::log(std::beta(elt_t(2.0), elt_t(3.0)))), 0); + TTS_ULP_EQUAL(lbeta(T(2.5), T(3.7)), T(std::log(std::beta(elt_t(2.5), elt_t(3.7)))), 0); +#endif // __cpp_lib_math_special_functions + + if constexpr( eve::platform::supports_invalids ) + { + TTS_ULP_EQUAL(lbeta(eve::inf(eve::as()), eve::inf(eve::as())), eve::nan(as()), 0); + TTS_ULP_EQUAL(lbeta(eve::inf(eve::as()), T(1)), eve::nan(as()), 0); + TTS_ULP_EQUAL(lbeta(T(1), eve::inf(eve::as())), eve::nan(as()), 0); + TTS_ULP_EQUAL(lbeta(eve::minf(eve::as()), eve::minf(eve::as())), eve::nan(as()), 0); + TTS_ULP_EQUAL(lbeta(eve::minf(eve::as()), T(1)), eve::nan(as()), 0); + TTS_ULP_EQUAL(lbeta(T(1), eve::minf(eve::as())), eve::nan(as()), 0); + + TTS_ULP_EQUAL(lbeta(eve::nan(eve::as()), eve::nan(eve::as())), eve::nan(eve::as()), 0); + TTS_ULP_EQUAL(lbeta(eve::nan(eve::as()), T(1)), eve::nan(as()), 0); + TTS_ULP_EQUAL(lbeta(T(1), eve::nan(eve::as())), eve::nan(as()), 0); + } +}; + + +//================================================================================================== +// Tests for masked lbeta +//================================================================================================== +TTS_CASE_WITH("Check behavior of eve::masked(eve::lbeta)(eve::wide)", + eve::test::simd::ieee_reals, + tts::generate(tts::randoms(0.0, 10.0), + tts::randoms(0.0, 10.0), + tts::logicals(0, 3))) +(T const& a0, + T const& a1, + M const& mask) +{ + TTS_IEEE_EQUAL(eve::lbeta[mask](a0, a1), + eve::if_else(mask, eve::lbeta(a0, a1), a0)); +}; diff --git a/test/unit/module/special/lrising_factorial.cpp b/test/unit/module/special/lrising_factorial.cpp index 0cda04381f..1523790978 100644 --- a/test/unit/module/special/lrising_factorial.cpp +++ b/test/unit/module/special/lrising_factorial.cpp @@ -18,9 +18,9 @@ TTS_CASE_TPL("Check return types of eve::lrising_factorial", eve::test::simd::ieee_reals) (tts::type) { - TTS_EXPR_IS(eve::raw(eve::lrising_factorial)(int(), T()), T); + TTS_EXPR_IS(eve::lrising_factorial[eve::raw](int(), T()), T); TTS_EXPR_IS(eve::lrising_factorial(int(), T()), T); - TTS_EXPR_IS(eve::pedantic(eve::lrising_factorial)(int(), T()), T); + TTS_EXPR_IS(eve::lrising_factorial[eve::pedantic](int(), T()), T); }; //================================================================================================== @@ -41,50 +41,50 @@ TTS_CASE_TPL("Check corner-cases behavior of eve::lrising_factorial on wide", if constexpr( eve::platform::supports_invalids ) { - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(eve::nan(eve::as()), T(2.5)), + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](eve::nan(eve::as()), T(2.5)), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(T(4.9), eve::nan(eve::as())), + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](T(4.9), eve::nan(eve::as())), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(eve::inf(eve::as()), T(2.5)), + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](eve::inf(eve::as()), T(2.5)), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(T(4.9), eve::inf(eve::as())), + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](T(4.9), eve::inf(eve::as())), eve::inf(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(eve::minf(eve::as()), T(2.5)), + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](eve::minf(eve::as()), T(2.5)), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(T(4.9), eve::minf(eve::as())), + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](T(4.9), eve::minf(eve::as())), eve::inf(eve::as()), 0); // verify } using elt_t = eve::element_type_t; TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(20), T(2.0)), T(6.040254711277414e+00), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(10, T(0.1)), T(2.256992585517679e-01), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(5, T(0.1)), T(1.517103381272788e-01), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(0, T(0.1)), eve::minf(eve::as()), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(2, T(0.1)), T(4.543773854448513e-02), ulp); + eve::lrising_factorial[eve::pedantic](T(20), T(2.0)), T(6.040254711277414e+00), ulp); + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](10, T(0.1)), T(2.256992585517679e-01), ulp); + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](5, T(0.1)), T(1.517103381272788e-01), ulp); + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](0, T(0.1)), eve::minf(eve::as()), ulp); + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](2, T(0.1)), T(4.543773854448513e-02), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(20.3), T(10.2)), T(3.272013434873217e+01), ulp); + eve::lrising_factorial[eve::pedantic](T(20.3), T(10.2)), T(3.272013434873217e+01), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(-20), T(-2.0)), T(-6.1355648910817386366), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(T(20), T(-20)), T(eve::log_abs_gamma(T(21))), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::lrising_factorial)(T(-20), T(20)), eve::inf(eve::as()), ulp); + eve::lrising_factorial[eve::pedantic](T(-20), T(-2.0)), T(-6.1355648910817386366), ulp); + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](T(20), T(-20)), T(eve::log_abs_gamma(T(21))), ulp); + TTS_ULP_EQUAL(eve::lrising_factorial[eve::pedantic](T(-20), T(20)), eve::inf(eve::as()), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(-20), T(10.3)), eve::minf(eve::as()), ulp); + eve::lrising_factorial[eve::pedantic](T(-20), T(10.3)), eve::minf(eve::as()), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(-20.2), T(-10.3)), T(-33.961897962564812303), ulp); + eve::lrising_factorial[eve::pedantic](T(-20.2), T(-10.3)), T(-33.961897962564812303), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(-20.2), T(10.3)), T(28.713944249477066251), ulp); + eve::lrising_factorial[eve::pedantic](T(-20.2), T(10.3)), T(28.713944249477066251), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(-20.2), T(30.4)), T(54.518836763604106466), ulp); + eve::lrising_factorial[eve::pedantic](T(-20.2), T(30.4)), T(54.518836763604106466), ulp); elt_t r = sizeof(eve::element_type_t) == 4 ? 1.7954323539015604183e-06 : 3.3442530265754862841e-15; TTS_ULP_EQUAL( - eve::pedantic(eve::lrising_factorial)(T(5), 10 * eve::eps(eve::as())), T(r), ulp); + eve::lrising_factorial[eve::pedantic](T(5), 10 * eve::eps(eve::as())), T(r), ulp); ulp = 110.0; }; diff --git a/test/unit/module/special/rising_factorial.cpp b/test/unit/module/special/rising_factorial.cpp index 07f3f7deed..c3783bd827 100644 --- a/test/unit/module/special/rising_factorial.cpp +++ b/test/unit/module/special/rising_factorial.cpp @@ -16,7 +16,7 @@ //== Types tests //================================================================================================== TTS_CASE_TPL("Check return types of eve::rising_factorial", eve::test::simd::ieee_reals) -(tts::type) { TTS_EXPR_IS(eve::raw(eve::rising_factorial)(int(), T()), T); }; +(tts::type) { TTS_EXPR_IS(eve::rising_factorial[eve::raw](int(), T()), T); }; //================================================================================================== //== Test for corner-cases values @@ -37,51 +37,51 @@ TTS_CASE_TPL("Check corner-cases behavior of eve::rising_factorial wide", if constexpr( eve::platform::supports_invalids ) { - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(eve::nan(eve::as()), T(2.5)), + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](eve::nan(eve::as()), T(2.5)), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(T(4.9), eve::nan(eve::as())), + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](T(4.9), eve::nan(eve::as())), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(eve::inf(eve::as()), T(2.5)), + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](eve::inf(eve::as()), T(2.5)), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(T(4.9), eve::inf(eve::as())), + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](T(4.9), eve::inf(eve::as())), eve::inf(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(eve::minf(eve::as()), T(2.5)), + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](eve::minf(eve::as()), T(2.5)), eve::nan(eve::as()), 0); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(T(4.9), eve::minf(eve::as())), + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](T(4.9), eve::minf(eve::as())), eve::nan(eve::as()), 0); } ulp = 70.0; - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(T(20), T(2.0)), T(420), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(10, T(0.1)), T(1.253198719801548e+00), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(5, T(0.1)), T(1.163823072432015e+00), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(0, T(0.1)), T(0), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(2, T(0.1)), T(1.046485846853560e+00), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](T(20), T(2.0)), T(420), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](10, T(0.1)), T(1.253198719801548e+00), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](5, T(0.1)), T(1.163823072432015e+00), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](0, T(0.1)), T(0), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](2, T(0.1)), T(1.046485846853560e+00), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::rising_factorial)(T(20.3), T(10.2)), T(1.622459238800527e+14), ulp); + eve::rising_factorial[eve::pedantic](T(20.3), T(10.2)), T(1.622459238800527e+14), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::rising_factorial)(T(-20), T(-2.0)), T(2.164502164502165e-03), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(T(20), T(-20)), T(eve::tgamma(T(21))), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(T(-20), T(20)), eve::inf(eve::as()), ulp); + eve::rising_factorial[eve::pedantic](T(-20), T(-2.0)), T(2.164502164502165e-03), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](T(20), T(-20)), T(eve::tgamma(T(21))), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](T(-20), T(20)), eve::inf(eve::as()), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::rising_factorial)(T(-20), T(10.3)), eve::zero(eve::as()), ulp); + eve::rising_factorial[eve::pedantic](T(-20), T(10.3)), eve::zero(eve::as()), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::rising_factorial)(T(-20.2), T(-10.3)), T(1.780471883652420e-15), ulp); + eve::rising_factorial[eve::pedantic](T(-20.2), T(-10.3)), T(1.780471883652420e-15), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::rising_factorial)(T(-20.2), T(10.3)), T(-2.953299835634237e+12), ulp); + eve::rising_factorial[eve::pedantic](T(-20.2), T(10.3)), T(-2.953299835634237e+12), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::rising_factorial)(T(-20.2), T(30.4)), T(-4.755869905739260e+23), ulp); - TTS_ULP_EQUAL(eve::pedantic(eve::rising_factorial)(T(5), 10 * eve::eps(eve::as())), + eve::rising_factorial[eve::pedantic](T(-20.2), T(30.4)), T(-4.755869905739260e+23), ulp); + TTS_ULP_EQUAL(eve::rising_factorial[eve::pedantic](T(5), 10 * eve::eps(eve::as())), T(1.000000000000003e+00), ulp); TTS_ULP_EQUAL( - eve::pedantic(eve::rising_factorial)(T(-20.5), T(-1.0)), T(-4.651162790697673e-02), ulp); + eve::rising_factorial[eve::pedantic](T(-20.5), T(-1.0)), T(-4.651162790697673e-02), ulp); };