diff --git a/include/eve/module/elliptic/regular/ellint_1.hpp b/include/eve/module/elliptic/regular/ellint_1.hpp index d51beb3db6..39eed77636 100644 --- a/include/eve/module/elliptic/regular/ellint_1.hpp +++ b/include/eve/module/elliptic/regular/ellint_1.hpp @@ -7,10 +7,27 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_1_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + T operator()(T a) const noexcept { return EVE_DISPATCH_CALL(a); } + + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(ellint_1_t, ellint_1_); + }; + + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -35,10 +52,10 @@ namespace eve //! namespace eve //! { //! template< eve::floating_ordered_value T > -//! T ellint_1(T k) noexcept; //1 +//! constexpr T ellint_1(T k) noexcept; //1 //! //! template< eve::floating_ordered_value T, eve::floating_ordered_value U > -//! eve::common_value_t ellint_1(T phi, U k) noexcept; //2 +//! constexpr eve::common_value_t ellint_1(T phi, U k) noexcept; //2 //! } //! @endcode //! @@ -63,7 +80,7 @@ namespace eve //! @godbolt{doc/elliptic/regular/ellint_1.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(ellint_1_, ellint_1); + inline constexpr auto ellint_1 = functor; } #include diff --git a/include/eve/module/elliptic/regular/ellint_2.hpp b/include/eve/module/elliptic/regular/ellint_2.hpp index a2b0746ab2..d703e55cee 100644 --- a/include/eve/module/elliptic/regular/ellint_2.hpp +++ b/include/eve/module/elliptic/regular/ellint_2.hpp @@ -7,10 +7,26 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_2_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + T operator()(T a) const noexcept { return EVE_DISPATCH_CALL(a); } + + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(ellint_2_t, ellint_2_); + }; + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -37,10 +53,10 @@ namespace eve //! namespace eve //! { //! template< eve::floating_ordered_value T > -//! T ellint_2(T k) noexcept; //1 +//! constexpr T ellint_2(T k) noexcept; //1 //! //! template< eve::floating_ordered_value T, eve::floating_ordered_value U > -//! eve::common_value_t ellint_2(T phi, U k) noexcept; //2 +//! constexpr eve::common_value_t ellint_2(T phi, U k) noexcept; //2 //! } //! @endcode //! @@ -66,7 +82,8 @@ namespace eve //! @godbolt{doc/elliptic/regular/ellint_2.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(ellint_2_, ellint_2); + inline constexpr auto ellint_2 = functor; + } #include diff --git a/include/eve/module/elliptic/regular/ellint_d.hpp b/include/eve/module/elliptic/regular/ellint_d.hpp index 191be32deb..bd5069fc4f 100644 --- a/include/eve/module/elliptic/regular/ellint_d.hpp +++ b/include/eve/module/elliptic/regular/ellint_d.hpp @@ -7,10 +7,27 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_d_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + T operator()(T a) const noexcept { return EVE_DISPATCH_CALL(a); } + + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(ellint_d_t, ellint_d_); + }; + + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -37,10 +54,10 @@ namespace eve //! namespace eve //! { //! template< eve::floating_ordered_value T > -//! T ellint_d(T k) noexcept; //1 +//! constexpr T ellint_d(T k) noexcept; //1 //! //! template< eve::floating_ordered_value T, eve::floating_ordered_value U > -//! eve::common_value_t ellint_d(T phi, U k) noexcept; //2 +//! constexpr eve::common_value_t ellint_d(T phi, U k) noexcept; //2 //! } //! @endcode //! @@ -66,7 +83,7 @@ namespace eve //! @godbolt{doc/elliptic/regular/ellint_d.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(ellint_d_, ellint_d); + inline constexpr auto ellint_d = functor; } #include diff --git a/include/eve/module/elliptic/regular/ellint_rc.hpp b/include/eve/module/elliptic/regular/ellint_rc.hpp index 66ac570a7c..e7ee0a0641 100644 --- a/include/eve/module/elliptic/regular/ellint_rc.hpp +++ b/include/eve/module/elliptic/regular/ellint_rc.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_rc_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b) const noexcept { return EVE_DISPATCH_CALL(a, b); } + + EVE_CALLABLE_OBJECT(ellint_rc_t, ellint_rc_); + }; + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -30,7 +42,7 @@ namespace eve //! @code //! namespace eve //! { -//! template< eve::floating_ordered_value T, eve::floating_ordered_value U > +//! constexpr template< eve::floating_ordered_value T, eve::floating_ordered_value U > //! eve::common_value_t ellint_rc(T x, U y) noexcept; //! } //! @endcode @@ -49,7 +61,7 @@ namespace eve //! @godbolt{doc/elliptic/regular/ellint_rc.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(ellint_rc_, ellint_rc); + inline constexpr auto ellint_rc = functor; } #include diff --git a/include/eve/module/elliptic/regular/ellint_rd.hpp b/include/eve/module/elliptic/regular/ellint_rd.hpp index fc64e02cf9..7bf34bc7b2 100644 --- a/include/eve/module/elliptic/regular/ellint_rd.hpp +++ b/include/eve/module/elliptic/regular/ellint_rd.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_rd_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b, T2 c) const noexcept { return EVE_DISPATCH_CALL(a, b, c); } + + EVE_CALLABLE_OBJECT(ellint_rd_t, ellint_rd_); + }; + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -34,7 +46,7 @@ namespace eve //! template< eve::floating_ordered_value T //! , eve::floating_ordered_value U //! , eve::floating_ordered_value V > -//! eve::common_value_t ellint_rc(T x, U y, V z) noexcept; +//! constexpr eve::common_value_t ellint_rd(T x, U y, V z) noexcept; //! } //! @endcode //! @@ -54,7 +66,7 @@ namespace eve //! @godbolt{doc/elliptic/regular/ellint_rc.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(ellint_rd_, ellint_rd); +inline constexpr auto ellint_rd = functor; } #include diff --git a/include/eve/module/elliptic/regular/ellint_rf.hpp b/include/eve/module/elliptic/regular/ellint_rf.hpp index 8108c387eb..570e3f1d56 100644 --- a/include/eve/module/elliptic/regular/ellint_rf.hpp +++ b/include/eve/module/elliptic/regular/ellint_rf.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_rf_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b, T2 c) const noexcept { return EVE_DISPATCH_CALL(a, b, c); } + + EVE_CALLABLE_OBJECT(ellint_rf_t, ellint_rf_); + }; + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -34,7 +46,7 @@ namespace eve //! template< eve::floating_ordered_value T //! , eve::floating_ordered_value U //! , eve::floating_ordered_value V > - //! eve::common_value_t ellint_rf(T x, U y, V z) noexcept; + //! constexpr eve::common_value_t ellint_rf(T x, U y, V z) noexcept; //! } //! @endcode //! @@ -53,7 +65,7 @@ namespace eve //! @godbolt{doc/elliptic/regular/ellint_rc.cpp} //! @} //================================================================================================ - EVE_MAKE_CALLABLE(ellint_rf_, ellint_rf); + inline constexpr auto ellint_rf = functor; } #include diff --git a/include/eve/module/elliptic/regular/ellint_rg.hpp b/include/eve/module/elliptic/regular/ellint_rg.hpp index 0dc42ac77c..a030cb1785 100644 --- a/include/eve/module/elliptic/regular/ellint_rg.hpp +++ b/include/eve/module/elliptic/regular/ellint_rg.hpp @@ -7,10 +7,22 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_rg_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b, T2 c) const noexcept { return EVE_DISPATCH_CALL(a, b, c); } + + EVE_CALLABLE_OBJECT(ellint_rg_t, ellint_rg_); + }; + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -35,7 +47,7 @@ namespace eve //! template< eve::floating_ordered_value T //! , eve::floating_ordered_value U //! , eve::floating_ordered_value V > -//! eve::common_value_t ellint_rg(T x, U y, V z) noexcept; +//! constexpr eve::common_value_t ellint_rg(T x, U y, V z) noexcept; //! } //! @endcode //! @@ -53,7 +65,7 @@ namespace eve //! @godbolt{doc/elliptic/regular/ellint_rc.cpp} //! @} //================================================================================================ -EVE_MAKE_CALLABLE(ellint_rg_, ellint_rg); + inline constexpr auto ellint_rg = functor; } #include diff --git a/include/eve/module/elliptic/regular/ellint_rj.hpp b/include/eve/module/elliptic/regular/ellint_rj.hpp index c9a17f9ea6..d196e61db4 100644 --- a/include/eve/module/elliptic/regular/ellint_rj.hpp +++ b/include/eve/module/elliptic/regular/ellint_rj.hpp @@ -7,10 +7,24 @@ //================================================================================================== #pragma once -#include +#include +#include +#include namespace eve { + template + struct ellint_rj_t : elementwise_callable + { + template + constexpr EVE_FORCEINLINE + eve::common_value_t operator()(T0 a, T1 b, T2 c, T3 d) const noexcept + { return EVE_DISPATCH_CALL(a, b, c, d); } + + EVE_CALLABLE_OBJECT(ellint_rj_t, ellint_rj_); + }; + //================================================================================================ //! @addtogroup elliptic //! @{ @@ -34,7 +48,7 @@ namespace eve //! , eve::floating_ordered_value U //! , eve::floating_ordered_value V //! , eve::floating_ordered_value W> -//! eve::common_value_t ellint_rj(T x, U y, V z, W p) noexcept; +//! constexpr eve::common_value_t ellint_rj(T x, U y, V z, W p) noexcept; //! } //! @endcode //! @@ -54,7 +68,7 @@ namespace eve //! //! @godbolt{doc/elliptic/regular/ellint_rc.cpp} //! @} -EVE_MAKE_CALLABLE(ellint_rj_, ellint_rj); + inline constexpr auto ellint_rj = functor; } #include diff --git a/include/eve/module/elliptic/regular/impl/ellint_1.hpp b/include/eve/module/elliptic/regular/impl/ellint_1.hpp index 82b0323e68..800a4ea160 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_1.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_1.hpp @@ -15,92 +15,73 @@ namespace eve::detail { -template -EVE_FORCEINLINE T -ellint_1_(EVE_SUPPORTS(cpu_), T x) noexcept -{ - if constexpr( has_native_abi_v ) + template + constexpr EVE_FORCEINLINE T + ellint_1_(EVE_REQUIRES(cpu_), O const& o, T x) { - auto xx = eve::abs(x); - xx = if_else(xx > one(as(x)), allbits, xx); - auto a = one(as(x)); - auto b = sqrt(oneminus(sqr(xx))); - auto c = xx; - while( eve::any((eve::abs(c) > eps(as(x)))) ) + if constexpr( has_native_abi_v ) { - auto an = average(a, b); - auto bn = sqrt(a * b); - c = average(a, -b); - a = an; - b = bn; + auto xx = eve::abs(x); + xx = if_else(xx > one(as(x)), allbits, xx); + auto a = one(as(x)); + auto b = sqrt(oneminus(sqr(xx))); + auto c = xx; + while( eve::any((eve::abs(c) > eps(as(x)))) ) + { + auto an = average(a, b); + auto bn = sqrt(a * b); + c = average(a, -b); + a = an; + b = bn; + } + return pio_2(as(x)) / b; } - return pio_2(as(x)) / b; + else return apply_over(ellint_1[o], x); } - else return apply_over(ellint_1, x); -} - -template -EVE_FORCEINLINE auto -ellint_1_(EVE_SUPPORTS(cpu_), T phi, U x) noexcept --> common_value_t -{ - return arithmetic_call(ellint_1, phi, x); -} -template -EVE_FORCEINLINE T -ellint_1_(EVE_SUPPORTS(cpu_), T phi0, T x) noexcept requires has_native_abi_v -{ - x = eve::abs(x); - auto phi = abs(phi0); - // Carlson's algorithm works only for |phi| <= pi/2, - // use the integrand's periodicity to normalize phi - // - T rphi = rem(phi, pio_2(as(phi))); // rempio2 ? - T m = nearest((phi - rphi) / pio_2(as(phi))); - auto oddm = is_odd(m); - m = inc[oddm](m); - T s = if_else(oddm, mone, one(as(x))); - rphi = if_else(oddm, pio_2(as(phi)) - rphi, rphi); - auto [sinp, cosp] = sincos(rphi); - sinp *= sinp; - cosp *= cosp; - auto notdone = sinp * sqr(x) < one(as(phi)); - - auto c = if_else(notdone, rec(sinp), allbits); - auto r = s * ellint_rf(cosp * c, c - sqr(x), c); - auto xis1 = x == one(as(x)); - if( eve::any(xis1) ) - { - r = if_else(xis1, if_else(phi == pio_2(as(x)), inf(as(x)), asinh(tan(phi0))), r); - } - r = if_else(rphi < smallestposval(as(x)), s * rphi, r); - auto mgt0 = is_nez(m) && notdone; - auto greatphi = eps(as(phi)) * phi > one(as(phi)) && notdone; - if( eve::any((mgt0 || greatphi) && notdone) ) + template + constexpr common_value_t + ellint_1_(EVE_REQUIRES(cpu_), O const& o, T phi0, U x) { - auto z = ellint_1(x); - r += m * z; - r = if_else(greatphi, phi * z / pio_2(as(x)), r); - } - return copysign(r, phi); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_1_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_1(t0, ts...), t0) ) -{ - return mask_op(cond, eve::ellint_1, t0, ts ...); -} + if constexpr(std::same_as) + { + x = eve::abs(x); + auto phi = abs(phi0); + // Carlson's algorithm works only for |phi| <= pi/2, + // use the integrand's periodicity to normalize phi + // + T rphi = rem(phi, pio_2(as(phi))); // rempio2 ? + T m = nearest((phi - rphi) / pio_2(as(phi))); + auto oddm = is_odd(m); + m = inc[oddm](m); + T s = if_else(oddm, mone, one(as(x))); + rphi = if_else(oddm, pio_2(as(phi)) - rphi, rphi); + auto [sinp, cosp] = sincos(rphi); + sinp *= sinp; + cosp *= cosp; + auto notdone = sinp * sqr(x) < one(as(phi)); -template -EVE_FORCEINLINE auto -ellint_1_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_1(t0, ts...), t0) ) -{ - return mask_op(cond, d(eve::ellint_1), t0, ts ...); -} + auto c = if_else(notdone, rec(sinp), allbits); + auto r = s * ellint_rf(cosp * c, c - sqr(x), c); + auto xis1 = x == one(as(x)); + if( eve::any(xis1) ) + { + r = if_else(xis1, if_else(phi == pio_2(as(x)), inf(as(x)), asinh(tan(phi0))), r); + } + r = if_else(rphi < smallestposval(as(x)), s * rphi, r); + auto mgt0 = is_nez(m) && notdone; + auto greatphi = eps(as(phi)) * phi > one(as(phi)) && notdone; + if( eve::any((mgt0 || greatphi) && notdone) ) + { + auto z = ellint_1(x); + r += m * z; + r = if_else(greatphi, phi * z / pio_2(as(x)), r); + } + return copysign(r, phi); + } + else + { + return arithmetic_call(ellint_1[o], phi0, x); + } + } } diff --git a/include/eve/module/elliptic/regular/impl/ellint_2.hpp b/include/eve/module/elliptic/regular/impl/ellint_2.hpp index 9842f06672..97e7b0881b 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_2.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_2.hpp @@ -16,84 +16,63 @@ namespace eve::detail { -template -EVE_FORCEINLINE T -ellint_2_(EVE_SUPPORTS(cpu_), T k) noexcept -{ - if constexpr( has_native_abi_v ) + template + constexpr EVE_FORCEINLINE T + ellint_2_(EVE_REQUIRES(cpu_), O const& o, T k) { - auto k2 = sqr(k); - auto r = 2 * ellint_rg(zero(as(k)), oneminus(k2), one(as(k))); - return if_else(k2 == one(as(k)), one, r); + if constexpr( has_native_abi_v ) + { + auto k2 = sqr(k); + auto r = 2 * ellint_rg(zero(as(k)), oneminus(k2), one(as(k))); + return if_else(k2 == one(as(k)), one, r); + } + else return apply_over(ellint_2[o], k); } - else return apply_over(ellint_2, k); -} -template -EVE_FORCEINLINE auto -ellint_2_(EVE_SUPPORTS(cpu_), T phi, U x) noexcept -{ - return arithmetic_call(ellint_2, phi, x); -} - -template -EVE_FORCEINLINE T -ellint_2_(EVE_SUPPORTS(cpu_), T phi0, T x) noexcept -{ - if constexpr( has_native_abi_v ) + template + constexpr common_value_t + ellint_2_(EVE_REQUIRES(cpu_), O const& o, T phi0, U x) { - x = eve::abs(x); - auto phi = abs(phi0); - // Carlson's algorithm works only for |phi| <= pi/2, - // use the integrand's periodicity to normalize phi - // - T rphi = rem(phi, pio_2(as(phi))); // rempio2 ? - T m = nearest((phi - rphi) / pio_2(as(phi))); - auto oddm = is_odd(m); - m = inc[oddm](m); - T s = if_else(oddm, mone, one(as(x))); - rphi = if_else(oddm, pio_2(as(phi)) - rphi, rphi); + if constexpr(std::same_as) + { + x = eve::abs(x); + auto phi = abs(phi0); + // Carlson's algorithm works only for |phi| <= pi/2, + // use the integrand's periodicity to normalize phi + // + T rphi = rem(phi, pio_2(as(phi))); // rempio2 ? + T m = nearest((phi - rphi) / pio_2(as(phi))); + auto oddm = is_odd(m); + m = inc[oddm](m); + T s = if_else(oddm, mone, one(as(x))); + rphi = if_else(oddm, pio_2(as(phi)) - rphi, rphi); - auto k2 = sqr(x); - auto [sinp, cosp] = sincos(rphi); - auto sinp2 = sqr(sinp); - auto notdone = sinp2 * k2 <= one(as(phi)); - auto c = if_else(notdone, rec(sinp2), allbits); - auto cm1 = sqr(cosp) * c; // c-1 - auto r = + auto k2 = sqr(x); + auto [sinp, cosp] = sincos(rphi); + auto sinp2 = sqr(sinp); + auto notdone = sinp2 * k2 <= one(as(phi)); + auto c = if_else(notdone, rec(sinp2), allbits); + auto cm1 = sqr(cosp) * c; // c-1 + auto r = s * (oneminus(k2) * ellint_rf(cm1, c - k2, c) + k2 * (oneminus(k2)) * ellint_rd(cm1, c, c - k2) / 3 + k2 * sqrt(cm1 / (c * (c - k2)))); - auto testz = is_eqz(k2); - auto test1 = k2 == one(as(k2)); + auto testz = is_eqz(k2); + auto test1 = k2 == one(as(k2)); - r = if_else(testz, phi, r); - r = if_else(test1, sinp, r); - auto mgt0 = is_nez(m) && notdone; - if( eve::any(mgt0) ) + r = if_else(testz, phi, r); + r = if_else(test1, sinp, r); + auto mgt0 = is_nez(m) && notdone; + if( eve::any(mgt0) ) + { + m = if_else(test1 || testz, zero, m); + r += m * ellint_2(x); + } + return copysign(r, phi); + } + else { - m = if_else(test1 || testz, zero, m); - r += m * ellint_2(x); + return arithmetic_call(ellint_2[o], phi0, x); } - return copysign(r, phi); } - else return apply_over(ellint_2, phi0, x); -} - - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_2_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::ellint_2, ts ...); -} - -template -EVE_FORCEINLINE auto -ellint_2_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::ellint_2), ts ...); -} } diff --git a/include/eve/module/elliptic/regular/impl/ellint_d.hpp b/include/eve/module/elliptic/regular/impl/ellint_d.hpp index 8fe02dd423..abfed2ad32 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_d.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_d.hpp @@ -15,85 +15,69 @@ namespace eve::detail { -template -EVE_FORCEINLINE T -ellint_d_(EVE_SUPPORTS(cpu_), T k) noexcept -{ - if constexpr( has_native_abi_v ) + template + constexpr EVE_FORCEINLINE T + ellint_d_(EVE_REQUIRES(cpu_), O const& o, T k) { - auto r = nan(as(k)); - auto t = sqr(k); - auto notdone = t < one(as(t)); - auto x(zero(as(k))); - auto y = oneminus(t); - auto z(one(as(k))); - return if_else(notdone, if_else(t < eps(as(k)), pio_4(as(k)), ellint_rd(x, y, z) / 3), r); + if constexpr( has_native_abi_v ) + { + auto r = nan(as(k)); + auto t = sqr(k); + auto notdone = t < one(as(t)); + auto x(zero(as(k))); + auto y = oneminus(t); + auto z(one(as(k))); + return if_else(notdone, if_else(t < eps(as(k)), pio_4(as(k)), ellint_rd(x, y, z) / 3), r); + } + else return apply_over(ellint_d[o], k); } - else return apply_over(ellint_d, k); -} -template -EVE_FORCEINLINE auto -ellint_d_(EVE_SUPPORTS(cpu_), T phi, U x) noexcept -{ - return arithmetic_call(ellint_d, phi, x); -} -template -EVE_FORCEINLINE T -ellint_d_(EVE_SUPPORTS(cpu_), T phi0, T k) noexcept requires has_native_abi_v -{ - auto aphi = eve::abs(phi0); - auto r = nan(as(aphi)); - auto notdone = is_finite(aphi); //&& test; - r = if_else(notdone, r, inf(as(phi0))); - if( eve::any(notdone) ) + template + constexpr T + ellint_d_(EVE_REQUIRES(cpu_), O const& o, T phi0, U k) requires has_native_abi_v { - auto br_philarge = [aphi](auto k) // aphi*eps(as(aphi)) > one(as(aphi)) - { return aphi * ellint_d(k) / pio_2(as(aphi)); }; - notdone = next_interval(br_philarge, notdone, aphi * eps(as(aphi)) > one(as(aphi)), r, k); - if( eve::any(notdone) ) + if constexpr(std::same_as) { - auto rphi = rem(aphi, pio_2(as(aphi))); - auto m = nearest((aphi - rphi) / pio_2(as(aphi))); - auto oddm = is_odd(m); - m = inc[oddm](m); - T s = if_else(oddm, mone, one(as(k))); - rphi = if_else(oddm, pio_2(as(phi0)) - rphi, rphi); - auto [sinp, cosp] = sincos(rphi); - T c = rec(sqr(sinp)); - T cm1 = sqr(cosp) * c; // c - 1 - T k2 = sqr(k); - auto br_reg = [c, cm1, k2, s, m](auto k) + auto aphi = eve::abs(phi0); + auto r = nan(as(aphi)); + auto notdone = is_finite(aphi); //&& test; + r = if_else(notdone, r, inf(as(phi0))); + if( eve::any(notdone) ) { - auto z = if_else(is_finite(c), s * ellint_rd(cm1, c - k2, c) / 3, zero); - auto test = is_nez(m); - if( eve::any(test) ) { return z + m * ellint_d(k); } - else return z; - }; - notdone = last_interval(br_reg, notdone, r, k); + auto br_philarge = [aphi](auto k) // aphi*eps(as(aphi)) > one(as(aphi)) + { return aphi * ellint_d(k) / pio_2(as(aphi)); }; + notdone = next_interval(br_philarge, notdone, aphi * eps(as(aphi)) > one(as(aphi)), r, k); + if( eve::any(notdone) ) + { + auto rphi = rem(aphi, pio_2(as(aphi))); + auto m = nearest((aphi - rphi) / pio_2(as(aphi))); + auto oddm = is_odd(m); + m = inc[oddm](m); + T s = if_else(oddm, mone, one(as(k))); + rphi = if_else(oddm, pio_2(as(phi0)) - rphi, rphi); + auto [sinp, cosp] = sincos(rphi); + T c = rec(sqr(sinp)); + T cm1 = sqr(cosp) * c; // c - 1 + T k2 = sqr(k); + auto br_reg = [c, cm1, k2, s, m](auto k) + { + auto z = if_else(is_finite(c), s * ellint_rd(cm1, c - k2, c) / 3, zero); + auto test = is_nez(m); + if( eve::any(test) ) { return z + m * ellint_d(k); } + else return z; + }; + notdone = last_interval(br_reg, notdone, r, k); - r = if_else(k2 * sinp * sinp > one(as(phi0)), allbits, r); - r = if_else(is_eqz(phi0), zero, r); + r = if_else(k2 * sinp * sinp > one(as(phi0)), allbits, r); + r = if_else(is_eqz(phi0), zero, r); + } + } + return copysign(r, phi0); + } + else + { + return arithmetic_call(ellint_d[o], phi0, k); } } - return copysign(r, phi0); -} - - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_d_(EVE_SUPPORTS(cpu_), C const& cond, Ts ... ts) noexcept -{ - return mask_op(cond, eve::ellint_d, ts ...); -} - -template -EVE_FORCEINLINE auto -ellint_d_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, Ts ... ts) noexcept -{ - return mask_op(cond, d(eve::ellint_d), ts ...); -} } diff --git a/include/eve/module/elliptic/regular/impl/ellint_rc.hpp b/include/eve/module/elliptic/regular/impl/ellint_rc.hpp index 9d4ee0ffaa..0455d10ca8 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_rc.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_rc.hpp @@ -14,77 +14,58 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -ellint_rc_(EVE_SUPPORTS(cpu_), T x, U y) noexcept --> common_value_t -{ - return arithmetic_call(ellint_rc, x, y); -} - -template -EVE_FORCEINLINE T -ellint_rc_(EVE_SUPPORTS(cpu_), T x, T y) noexcept requires has_native_abi_v -{ - auto ylt0 = is_ltz(y); - auto prefix = one(as(x)); - if( eve::any(ylt0) ) // provisions for y < 0 + template + constexpr common_value_t + ellint_rc_(EVE_REQUIRES(cpu_), O const& o, T x, U y) { - prefix = if_else(ylt0, sqrt(x / (x - y)), one); - x = sub[ylt0](x, y); - y = if_else(ylt0, -y, y); - } - // now all y are >= 0 - auto r = nan(as(x)); - auto notdone = is_nltz(x) && is_nez(y); - if( eve::any(notdone) ) - { - auto tmp0 = rsqrt(y); - auto br_0 = [tmp0](auto x, auto y) // x == y || x == 0 - { - auto z = mul[is_eqz(x)](tmp0, pio_2(as(y))); - return z; // if_else(x == y, tmp0, tmp0*pio_2(as(y))); - }; - notdone = next_interval(br_0, notdone, (x == y) || is_eqz(x), r, x, y); - if( eve::any(notdone) ) + if constexpr(std::same_as) { - auto tmp1 = sqrt(eve::abs(x - y)); - auto tmp2 = rsqrt(x); - auto tmp3 = tmp1 * tmp2; - auto br_1 = [tmp1, tmp3]() // y > x - { return atan(tmp3) / tmp1; }; - notdone = next_interval(br_1, notdone, y > x, r); + auto ylt0 = is_ltz(y); + auto prefix = one(as(x)); + if( eve::any(ylt0) ) // provisions for y < 0 + { + prefix = if_else(ylt0, sqrt(x / (x - y)), one); + x = sub[ylt0](x, y); + y = if_else(ylt0, -y, y); + } + // now all y are >= 0 + auto r = nan(as(x)); + auto notdone = is_nltz(x) && is_nez(y); if( eve::any(notdone) ) { - auto br_2 = [tmp1, tmp3]() // y > 0.5*x - { return atanh(tmp3) / tmp1; }; - notdone = next_interval(br_2, notdone, y > T(0.5) * x, r); + auto tmp0 = rsqrt(y); + auto br_0 = [tmp0](auto x, auto y) // x == y || x == 0 + { + auto z = mul[is_eqz(x)](tmp0, pio_2(as(y))); + return z; // if_else(x == y, tmp0, tmp0*pio_2(as(y))); + }; + notdone = next_interval(br_0, notdone, (x == y) || is_eqz(x), r, x, y); if( eve::any(notdone) ) { - auto br_3 = [tmp0, tmp1](auto x) { return log((sqrt(x) + tmp1) * tmp0) / tmp1; }; - last_interval(br_3, notdone, r, x); + auto tmp1 = sqrt(eve::abs(x - y)); + auto tmp2 = rsqrt(x); + auto tmp3 = tmp1 * tmp2; + auto br_1 = [tmp1, tmp3]() // y > x + { return atan(tmp3) / tmp1; }; + notdone = next_interval(br_1, notdone, y > x, r); + if( eve::any(notdone) ) + { + auto br_2 = [tmp1, tmp3]() // y > 0.5*x + { return atanh(tmp3) / tmp1; }; + notdone = next_interval(br_2, notdone, y > T(0.5) * x, r); + if( eve::any(notdone) ) + { + auto br_3 = [tmp0, tmp1](auto x) { return log((sqrt(x) + tmp1) * tmp0) / tmp1; }; + last_interval(br_3, notdone, r, x); + } + } } } + return if_else(x == inf(as(x)), zero, r * prefix); + } + else + { + return arithmetic_call(ellint_rc[o], x, y); } } - return if_else(x == inf(as(x)), zero, r * prefix); -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_rc_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rc(t0, ts...), t0) ) -{ - return mask_op(cond, eve::ellint_rc, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -ellint_rc_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rc(t0, ts...), t0) ) -{ - return mask_op(cond, d(eve::ellint_rc), t0, ts ...); -} } diff --git a/include/eve/module/elliptic/regular/impl/ellint_rd.hpp b/include/eve/module/elliptic/regular/impl/ellint_rd.hpp index c9125dfd11..2cf2582c88 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_rd.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_rd.hpp @@ -14,118 +14,89 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -ellint_rd_(EVE_SUPPORTS(cpu_), - T x, - U y, - V z) noexcept --> common_value_t -{ - return arithmetic_call(ellint_rd, x, y, z); -} - -template -EVE_FORCEINLINE auto -ellint_rd_(EVE_SUPPORTS(cpu_), raw_type const&, T x, U y, V z) noexcept --> common_value_t{ - return arithmetic_call(raw(ellint_rd), x, y, z); -} - -template -EVE_FORCEINLINE T -ellint_rd_(EVE_SUPPORTS(cpu_), raw_type const&, T x, T y, T z) noexcept requires has_native_abi_v -{ - using elt_t = element_type_t; - T xn = x; - T yn = y; - T zn = z; - T an = (x + y + T(3) * z) / 5; - T a0 = an; - auto epsi = pow_abs(elt_t(0.25) * eps(as(element_type_t())), -1 / T(8)); - T q = epsi * (eve::max)((eve::max)(eve::abs(an - xn), eve::abs(an - yn)), eve::abs(an - zn)) - * elt_t(1.2); - T fn(one(as(x))); - T rd_sum(zero(as(x))); - - // duplication - unsigned k = 0; - T hf = half(as(x)); - for( ; k < 30; ++k ) + template + constexpr common_value_t + ellint_rd_(EVE_REQUIRES(cpu_), O const& o, T x, U y, V z) { - T root_x = eve::sqrt(xn); - T root_y = eve::sqrt(yn); - T root_z = eve::sqrt(zn); - T lambda = root_x * root_y + root_x * root_z + root_y * root_z; - rd_sum += fn / (root_z * (zn + lambda)); - an = average(an, lambda) * hf; - xn = average(xn, lambda) * hf; - yn = average(yn, lambda) * hf; - zn = average(zn, lambda) * hf; - q *= T(0.25); - fn *= T(0.25); - if( eve::all(q < eve::abs(an)) ) break; - } - - T invan = rec(an); - T xx = fn * (a0 - x) * invan; - T yy = fn * (a0 - y) * invan; - T zz = -(xx + yy) / 3; - T xxyy = xx * yy; - T zz2 = sqr(zz); - T e2 = fnma(T(6), zz2, xxyy); - T e3 = (3 * xxyy - 8 * zz2) * zz; - T e4 = 3 * (xxyy - zz2) * zz2; - T e5 = xxyy * zz2 * zz; + if constexpr(O::contains(raw2)) + { + if constexpr(std::same_as && std::same_as) + { + using elt_t = element_type_t; + T xn = x; + T yn = y; + T zn = z; + T an = (x + y + T(3) * z) / 5; + T a0 = an; + auto epsi = pow_abs(elt_t(0.25) * eps(as(element_type_t())), -1 / T(8)); + T q = epsi * (eve::max)((eve::max)(eve::abs(an - xn), eve::abs(an - yn)), eve::abs(an - zn)) + * elt_t(1.2); + T fn(one(as(x))); + T rd_sum(zero(as(x))); - constexpr elt_t c0 = sizeof(elt_t) == 4 ? -3 / 14.0f : -3 / 14.0; - constexpr elt_t c1 = sizeof(elt_t) == 4 ? 1 / 6.0f : 1 / 6.0; - constexpr elt_t c2 = sizeof(elt_t) == 4 ? 9 / 88.0f : 9 / 88.0; - constexpr elt_t c3 = sizeof(elt_t) == 4 ? -3 / 22.0f : -3 / 22.0; - constexpr elt_t c4 = sizeof(elt_t) == 4 ? -9 / 52.0f : -9 / 52.0; - constexpr elt_t c5 = sizeof(elt_t) == 4 ? 3 / 26.0f : 3 / 26.0; - constexpr elt_t c6 = sizeof(elt_t) == 4 ? -1 / 16.0f : 1 / 16.0; - constexpr elt_t c7 = sizeof(elt_t) == 4 ? 3 / 40.0f : 3 / 40.0; - constexpr elt_t c8 = sizeof(elt_t) == 4 ? 3 / 20.0f : 3 / 20.0; - constexpr elt_t c9 = sizeof(elt_t) == 4 ? 45 / 272.0f : 45 / 272.0; - constexpr elt_t c10 = sizeof(elt_t) == 4 ? -9 / 68.0f : -9 / 68.0; + // duplication + unsigned k = 0; + T hf = half(as(x)); + for( ; k < 30; ++k ) + { + T root_x = eve::sqrt(xn); + T root_y = eve::sqrt(yn); + T root_z = eve::sqrt(zn); + T lambda = root_x * root_y + root_x * root_z + root_y * root_z; + rd_sum += fn / (root_z * (zn + lambda)); + an = average(an, lambda) * hf; + xn = average(xn, lambda) * hf; + yn = average(yn, lambda) * hf; + zn = average(zn, lambda) * hf; + q *= T(0.25); + fn *= T(0.25); + if( eve::all(q < eve::abs(an)) ) break; + } - T e22 = sqr(e2); - T result = fn * invan * sqrt(invan) - * (1 + e2 * c0 + e3 * c1 + e22 * c2 + e4 * c3 + e2 * (e3 * c4 + e22 * c6) + e5 * c5 - + sqr(e3) * c7 + e2 * e4 * c8 + c9 * e22 * e3 + (e3 * e4 + e2 * e5) * c10); + T invan = rec(an); + T xx = fn * (a0 - x) * invan; + T yy = fn * (a0 - y) * invan; + T zz = -(xx + yy) / 3; + T xxyy = xx * yy; + T zz2 = sqr(zz); + T e2 = fnma(T(6), zz2, xxyy); + T e3 = (3 * xxyy - 8 * zz2) * zz; + T e4 = 3 * (xxyy - zz2) * zz2; + T e5 = xxyy * zz2 * zz; - return fma(T(3), rd_sum, result); -} + constexpr elt_t c0 = sizeof(elt_t) == 4 ? -3 / 14.0f : -3 / 14.0; + constexpr elt_t c1 = sizeof(elt_t) == 4 ? 1 / 6.0f : 1 / 6.0; + constexpr elt_t c2 = sizeof(elt_t) == 4 ? 9 / 88.0f : 9 / 88.0; + constexpr elt_t c3 = sizeof(elt_t) == 4 ? -3 / 22.0f : -3 / 22.0; + constexpr elt_t c4 = sizeof(elt_t) == 4 ? -9 / 52.0f : -9 / 52.0; + constexpr elt_t c5 = sizeof(elt_t) == 4 ? 3 / 26.0f : 3 / 26.0; + constexpr elt_t c6 = sizeof(elt_t) == 4 ? -1 / 16.0f : 1 / 16.0; + constexpr elt_t c7 = sizeof(elt_t) == 4 ? 3 / 40.0f : 3 / 40.0; + constexpr elt_t c8 = sizeof(elt_t) == 4 ? 3 / 20.0f : 3 / 20.0; + constexpr elt_t c9 = sizeof(elt_t) == 4 ? 45 / 272.0f : 45 / 272.0; + constexpr elt_t c10 = sizeof(elt_t) == 4 ? -9 / 68.0f : -9 / 68.0; -template -EVE_FORCEINLINE T -ellint_rd_(EVE_SUPPORTS(cpu_), T x, T y, T z) noexcept requires has_native_abi_v -{ - auto r = nan(as(x)); - auto notdone = is_nltz(x) && is_nltz(y) && is_nlez(z) && is_nez(x + y); - // z equal to zero or any parameter nan or less than zero or more than one of x and y zero implies - // nan - auto br0 = [x, y, z]() { return raw(ellint_rd)(x, y, z); }; - last_interval(br0, notdone, r); - return r; -} + T e22 = sqr(e2); + T result = fn * invan * sqrt(invan) + * (1 + e2 * c0 + e3 * c1 + e22 * c2 + e4 * c3 + e2 * (e3 * c4 + e22 * c6) + e5 * c5 + + sqr(e3) * c7 + e2 * e4 * c8 + c9 * e22 * e3 + (e3 * e4 + e2 * e5) * c10); -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_rd_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rd(t0, ts...), t0) ) -{ - return mask_op(cond, eve::ellint_rd, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -ellint_rd_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rd(t0, ts...), t0) ) -{ - return mask_op(cond, d(eve::ellint_rd), t0, ts ...); -} + return fma(T(3), rd_sum, result); + } + else + { + return arithmetic_call(ellint_rd[o], x, y, z); + } + } + else + { + auto r = nan(as(x)); + auto notdone = is_nltz(x) && is_nltz(y) && is_nlez(z) && is_nez(x + y); + // z equal to zero or any parameter nan or less than zero or more than one of x and y zero implies + // nan + auto br0 = [x, y, z]() { return ellint_rd[raw](x, y, z); }; // o+raw + last_interval(br0, notdone, r); + return r; + } + } } diff --git a/include/eve/module/elliptic/regular/impl/ellint_rf.hpp b/include/eve/module/elliptic/regular/impl/ellint_rf.hpp index 4cdc3e072e..4ccce3ea6c 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_rf.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_rf.hpp @@ -14,107 +14,77 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -ellint_rf_(EVE_SUPPORTS(cpu_), - T x, - U y, - V z) noexcept --> common_value_t -{ - return arithmetic_call(ellint_rf, x, y, z); -} - -template -EVE_FORCEINLINE auto -ellint_rf_(EVE_SUPPORTS(cpu_), raw_type const&, T x, U y, V z) noexcept --> common_value_t -{ - return arithmetic_call(raw(ellint_rf), x, y, z); -} - -template -EVE_FORCEINLINE T -ellint_rf_(EVE_SUPPORTS(cpu_), raw_type const&, T x, T y, T z) noexcept requires has_native_abi_v -{ - T xn = x; - T yn = y; - T zn = z; - T an = (x + y + z) / 3; - T a0 = an; - auto epsi = pow_abs(3 * eps(as(element_type_t())), -1 / T(8)); - T q = epsi * (eve::max)((eve::max)(eve::abs(an - xn), eve::abs(an - yn)), eve::abs(an - zn)); - T fn = one(as(x)); - - // duplication - unsigned k = 1; - T hf = half(as(x)); - for( ; k < 30; ++k ) + template + constexpr common_value_t + ellint_rf_(EVE_REQUIRES(cpu_), O const& o, T x, U y, V z) { - T root_x = eve::sqrt(xn); - T root_y = eve::sqrt(yn); - T root_z = eve::sqrt(zn); - T lambda = root_x * root_y + root_x * root_z + root_y * root_z; - an = average(an, lambda) * hf; - xn = average(xn, lambda) * hf; - yn = average(yn, lambda) * hf; - zn = average(zn, lambda) * hf; - q *= T(0.25); - fn *= T(4); - if( eve::all(q < eve::abs(an)) ) break; - } - T denom = rec(an * fn); - T xx = (a0 - x) * denom; - T yy = (a0 - y) * denom; - T zz = -xx - yy; + if constexpr(O::contains(raw2)) + { + if constexpr(std::same_as && std::same_as) + { + T xn = x; + T yn = y; + T zn = z; + T an = (x + y + z) / 3; + T a0 = an; + auto epsi = pow_abs(3 * eps(as(element_type_t())), -1 / T(8)); + T q = epsi * (eve::max)((eve::max)(eve::abs(an - xn), eve::abs(an - yn)), eve::abs(an - zn)); + T fn = one(as(x)); - // Taylor series expansion to the 7th order - T p = xx * yy; - T e2 = fnma(zz, zz, p); - T e3 = p * zz; - // TODO put constant values in expansion - using elt_t = element_type_t; - constexpr elt_t c0 = sizeof(elt_t) == 4 ? 1 / 14.0f : 1 / 14.0; - constexpr elt_t c1 = sizeof(elt_t) == 4 ? 3 / 104.0f : 3 / 104.0; - constexpr elt_t c2 = sizeof(elt_t) == 4 ? -1 / 10.0f : -1 / 10.0; - constexpr elt_t c4 = sizeof(elt_t) == 4 ? 1 / 24.0f : 1 / 24.0; - constexpr elt_t c5 = sizeof(elt_t) == 4 ? -3 / 44.0f : -3 / 44.0; - constexpr elt_t c6 = sizeof(elt_t) == 4 ? -5 / 208.0f : -5 / 208.0; - constexpr elt_t c7 = sizeof(elt_t) == 4 ? -1 / 16.0f : 1 / 16.0; - return (fma(e3, - fma(e3, c1, c0), - fma(e2, (c2 + e3 * c5 + e2 * (c4 + e2 * c6 + e3 * c7)), one(as(x))))) - * rsqrt(an); -} + // duplication + unsigned k = 1; + T hf = half(as(x)); + for( ; k < 30; ++k ) + { + T root_x = eve::sqrt(xn); + T root_y = eve::sqrt(yn); + T root_z = eve::sqrt(zn); + T lambda = root_x * root_y + root_x * root_z + root_y * root_z; + an = average(an, lambda) * hf; + xn = average(xn, lambda) * hf; + yn = average(yn, lambda) * hf; + zn = average(zn, lambda) * hf; + q *= T(0.25); + fn *= T(4); + if( eve::all(q < eve::abs(an)) ) break; + } + T denom = rec(an * fn); + T xx = (a0 - x) * denom; + T yy = (a0 - y) * denom; + T zz = -xx - yy; -template -EVE_FORCEINLINE T -ellint_rf_(EVE_SUPPORTS(cpu_), T x, T y, T z) noexcept requires has_native_abi_v -{ - auto r = nan(as(x)); - auto notdone = - is_nltz(x) && is_nltz(y) && is_nltz(z) && is_nez(x + y) && is_nez(y + z) && is_nez(z + x); - // any parameter nan or less than zero or more than one parameter zero implies nan - auto br0 = [x, y, z]() { return raw(ellint_rf)(x, y, z); }; - last_interval(br0, notdone, r); - return r; -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_rf_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rf(t0, ts...), t0) ) -{ - return mask_op(cond, eve::ellint_rf, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -ellint_rf_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rf(t0, ts...), t0) ) -{ - return mask_op(cond, d(eve::ellint_rf), t0, ts ...); -} + // Taylor series expansion to the 7th order + T p = xx * yy; + T e2 = fnma(zz, zz, p); + T e3 = p * zz; + // TODO put constant values in expansion + using elt_t = element_type_t; + constexpr elt_t c0 = sizeof(elt_t) == 4 ? 1 / 14.0f : 1 / 14.0; + constexpr elt_t c1 = sizeof(elt_t) == 4 ? 3 / 104.0f : 3 / 104.0; + constexpr elt_t c2 = sizeof(elt_t) == 4 ? -1 / 10.0f : -1 / 10.0; + constexpr elt_t c4 = sizeof(elt_t) == 4 ? 1 / 24.0f : 1 / 24.0; + constexpr elt_t c5 = sizeof(elt_t) == 4 ? -3 / 44.0f : -3 / 44.0; + constexpr elt_t c6 = sizeof(elt_t) == 4 ? -5 / 208.0f : -5 / 208.0; + constexpr elt_t c7 = sizeof(elt_t) == 4 ? -1 / 16.0f : 1 / 16.0; + return (fma(e3, + fma(e3, c1, c0), + fma(e2, (c2 + e3 * c5 + e2 * (c4 + e2 * c6 + e3 * c7)), one(as(x))))) + * rsqrt(an); + } + else + { + return arithmetic_call(ellint_rf[o], x, y, z); + } + } + else + { + auto r = nan(as(x)); + auto notdone = + is_nltz(x) && is_nltz(y) && is_nltz(z) && is_nez(x + y) && is_nez(y + z) && is_nez(z + x); + // any parameter nan or less than zero or more than one parameter zero implies nan + auto br0 = [x, y, z]() { return ellint_rf[raw](x, y, z); }; + last_interval(br0, notdone, r); + return r; + } + } } diff --git a/include/eve/module/elliptic/regular/impl/ellint_rg.hpp b/include/eve/module/elliptic/regular/impl/ellint_rg.hpp index 03f27a143a..cfa5329995 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_rg.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_rg.hpp @@ -16,71 +16,33 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -ellint_rg_(EVE_SUPPORTS(cpu_), - T x, - U y, - V z) noexcept --> common_value_t -{ - return arithmetic_call(ellint_rg, x, y, z); -} - -template -EVE_FORCEINLINE auto -ellint_rg_(EVE_SUPPORTS(cpu_), raw_type const&, T x, U y, V z) noexcept --> common_value_t -{ - return arithmetic_call(raw(ellint_rg), x, y, z); -} - -template -EVE_FORCEINLINE T -ellint_rg_(EVE_SUPPORTS(cpu_), raw_type const&, T x, T y, T z) noexcept requires has_native_abi_v -{ - auto cond_swap = [](auto cond, auto& a, auto& b) + template + constexpr common_value_t + ellint_rg_(EVE_REQUIRES(cpu_), O const& o, T x, U y, V z) { - auto aa = if_else(cond, a, b); - auto bb = if_else(cond, b, a); - a = aa; - b = bb; - }; - cond_swap(x < y, x, y); - cond_swap(x < z, x, z); - cond_swap(y > z, y, z); - // now all(x >= z) and all(z >= y) - return (z * ellint_rf(x, y, z) - (x - z) * (y - z) * ellint_rd(x, y, z) / 3 + sqrt(x * y / z)) - * half(as(x)); -} - -template -EVE_FORCEINLINE T -ellint_rg_(EVE_SUPPORTS(cpu_), T x, T y, T z) noexcept requires has_native_abi_v -{ - auto r = nan(as(x)); - auto notdone = is_nltz(x) && is_nltz(y) && is_nltz(z); - // any parameter nan or less than zero implies nan - auto br0 = [x, y, z]() { return raw(ellint_rg)(x, y, z); }; - last_interval(br0, notdone, r); - return r; -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_rg_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rg(t0, ts...), t0) ) -{ - return mask_op(cond, eve::ellint_rg, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -ellint_rg_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rg(t0, ts...), t0) ) -{ - return mask_op(cond, d(eve::ellint_rg), t0, ts ...); -} + if constexpr(O::contains(raw2)) + { + if constexpr(std::same_as && std::same_as) + { + swap_if(x < y, x, y); + swap_if(x < z, x, z); + swap_if(y > z, y, z); + // now all(x >= z) and all(z >= y) + return (z * ellint_rf(x, y, z) - (x - z) * (y - z) * + ellint_rd(x, y, z) / 3 + sqrt(x * y / z)) + * half(as(x)); + } + else + return arithmetic_call(ellint_rg[o], x, y, z); + } + else + { + auto r = nan(as(x)); + auto notdone = is_nltz(x) && is_nltz(y) && is_nltz(z); + // any parameter nan or less than zero implies nan + auto br0 = [x, y, z]() { return ellint_rg[raw](x, y, z); }; + last_interval(br0, notdone, r); + return r; + } + } } diff --git a/include/eve/module/elliptic/regular/impl/ellint_rj.hpp b/include/eve/module/elliptic/regular/impl/ellint_rj.hpp index f8c9ae9e55..d8cfa49346 100644 --- a/include/eve/module/elliptic/regular/impl/ellint_rj.hpp +++ b/include/eve/module/elliptic/regular/impl/ellint_rj.hpp @@ -17,231 +17,196 @@ namespace eve::detail { -template -EVE_FORCEINLINE auto -ellint_rj_(EVE_SUPPORTS(cpu_), T x, U y, V z, W p) noexcept --> common_value_t -{ - return arithmetic_call(ellint_rj, x, y, z, p); -} - -template -EVE_FORCEINLINE auto -ellint_rj_(EVE_SUPPORTS(cpu_), raw_type const&, T x, U y, V z, W p) noexcept --> common_value_t -{ - return arithmetic_call(raw(ellint_rj), x, y, z, p); -} - -template -EVE_FORCEINLINE T -ellint_rj_(EVE_SUPPORTS(cpu_), T x, T y, T z, T p) noexcept requires has_native_abi_v -{ - auto r = nan(as(x)); - auto notdone = is_nltz(x) && is_nltz(y) && is_nltz(z) && is_nez(p) && is_nez(x + y) - && is_nez(z + y) && is_nez(x + z); - if( eve::any(notdone) ) + template + constexpr common_value_t + ellint_rj_(EVE_REQUIRES(cpu_), O const& o, T x, U y, V z, W p) { - auto br_pneg = [](auto x, auto y, auto z, auto p) // p < 0 + if constexpr(std::same_as && std::same_as && std::same_as) { - auto cond_swap = [](auto cond, auto& a, auto& b) + if (O::contains(raw2)) { - auto aa = if_else(cond, a, b); - auto bb = if_else(cond, b, a); - a = aa; - b = bb; - }; - cond_swap(x < y, x, y); - cond_swap(x < z, x, z); - cond_swap(y > z, y, z); - // now all(x <= y) and all(y <= z) - auto q = -p; - p = fms(z, (x + y + q), x * y) / (z + q); - auto v = (p - z) * ellint_rj(x, y, z, p); - v -= 3 * ellint_rf(x, y, z); - auto pq = p * q; - auto tmp = fma(x, y, pq); - v += 3 * sqrt((x * y * z) / tmp) * ellint_rc(tmp, pq); - v /= (z + q); - return v; - }; - notdone = next_interval(br_pneg, notdone, is_ltz(p), r, x, y, z, p); - if( eve::any(notdone) ) - { - auto br_eqxy = [](auto x, auto p) // (x == y) && (x == z) - { - auto rsqtx = rsqrt(x); - return if_else(x == p, rsqtx / x, 3 * ellint_rc(x, p) - rsqtx / (x - p)); - }; - notdone = next_interval(br_eqxy, notdone, (x == y) && (x == z), r, x, p); - if( eve::any(notdone) ) - { - auto br_eqzp = [](auto x, auto y, auto z) // (p == z) - { return ellint_rd(x, y, z); }; - notdone = next_interval(br_eqzp, notdone, (z == p), r, x, y, z); - if( eve::any(notdone) ) + auto xn = x; + auto yn = y; + auto zn = z; + auto pn = p; + auto an = (x + y + z + 2 * p) / 5; + auto a0 = an; + auto delta = (p - x) * (p - y) * (p - z); + auto q = pow_abs(eps(as()) / 5, -T(1) / 8) + * (eve::max)((eve::max)(eve::abs(an - x), eve::abs(an - y)), + (eve::max)(eve::abs(an - z), eve::abs(an - p))); + + T fmn(one(as(x))); // 4^-n + T rc_sum(zero(as(x))); + + for( unsigned n = 0; n < 30; ++n ) { - x = if_else(notdone, x, one); - y = if_else(notdone, y, one); - z = if_else(notdone, z, one); - p = if_else(notdone, p, one); - auto br_last = [](auto x, auto y, auto z, auto p) { return raw(ellint_rj)(x, y, z, p); }; - last_interval(br_last, notdone, r, x, y, z, p); - } - } - } - } - return r; -} + auto rx = sqrt(xn); + auto ry = sqrt(yn); + auto rz = sqrt(zn); + auto rp = sqrt(pn); + auto dn = (rp + rx) * (rp + ry) * (rp + rz); + auto en = delta / dn; + en /= dn; + auto test = (en < -0.5) && (en > -1.5); + auto rc1p = [](auto y){ + auto r = zero(as(y)); + auto notdone = true_(as(y)); //!= mone(as(y)); + if( eve::any(notdone) ) + { + auto br_yltm1 = [](auto my) { return rsqrt(my) * ellint_rc(my, dec(my)); }; + notdone = next_interval(br_yltm1, notdone, y < mone(as(y)), r, -y); + if( eve::any(notdone) ) + { + auto br_ygt0 = [](auto y) + { + return atan(sqrt(y)) * rsqrt(y); + ; + }; + notdone = next_interval(br_ygt0, notdone, is_gtz(y), r, y); + if( eve::any(notdone) ) + { + auto arg = sqrt(-y); + auto log1parg = log1p(arg); + auto br_ygtmhf = [arg, log1parg]() + { return if_else(is_eqz(arg), T(1), (log1parg - log1p(-arg)) / (2 * arg)); }; + notdone = next_interval(br_ygtmhf, notdone, y > T(-0.5), r); + if( eve::any(notdone) ) + { + auto br_last = [arg, log1parg](auto y) { return log1parg * rsqrt(inc(y)) / arg; }; + last_interval(br_last, notdone, r, y); + } + } + } + } + return r; + }; -template -EVE_FORCEINLINE T -ellint_rj_(EVE_SUPPORTS(cpu_), raw_type const&, T x, T y, T z, T p) noexcept requires - has_native_abi_v -{ - auto xn = x; - auto yn = y; - auto zn = z; - auto pn = p; - auto an = (x + y + z + 2 * p) / 5; - auto a0 = an; - auto delta = (p - x) * (p - y) * (p - z); - auto q = pow_abs(eps(as()) / 5, -T(1) / 8) - * (eve::max)((eve::max)(eve::abs(an - x), eve::abs(an - y)), - (eve::max)(eve::abs(an - z), eve::abs(an - p))); + if( eve::any(test) ) + { + // + // occationally en ~ -1, we then have no means of calculating + // rc(1, 1+en) without terrible cancellation error, so we + // need to get to 1+en directly. by substitution we have + // + // 1+e_0 = 1 + (p-x)*(p-y)*(p-z)/((sqrt(p) + sqrt(x))*(sqrt(p)+sqrt(y))*(sqrt(p)+sqrt(z)))^2 + // = 2*sqrt(p)*(p+sqrt(x) * (sqrt(y)+sqrt(z)) + sqrt(y)*sqrt(z)) / ((sqrt(p) + + // sqrt(x))*(sqrt(p) + sqrt(y)*(sqrt(p)+sqrt(z)))) + // + // and since this is just an application of the duplication formula for rj, the same + // expression works for 1+en if we use x,y,z,p_n etc. + // this branch is taken only once or twice at the start of iteration, + // after than en reverts to it's usual very small values. + // + auto b = 2 * rp * (pn + rx * (ry + rz) + ry * rz) / dn; + auto r0 = ellint_rc(T(1), b); + auto r1 = rc1p(en); + auto tmp = if_else(test, r0, r1); + rc_sum += fmn / dn * tmp; + } + else + { + auto r = rc1p(en); + rc_sum += fmn / dn * r; + } + auto lambda = fma(rx, ry, fma(rx, rz, ry * rz)); - T fmn(one(as(x))); // 4^-n - T rc_sum(zero(as(x))); + // from here on we move to n+1: + an = (an + lambda) * T(0.25); // / 4; + fmn /= 4; - for( unsigned n = 0; n < 30; ++n ) - { - auto rx = sqrt(xn); - auto ry = sqrt(yn); - auto rz = sqrt(zn); - auto rp = sqrt(pn); - auto dn = (rp + rx) * (rp + ry) * (rp + rz); - auto en = delta / dn; - en /= dn; - auto test = (en < -0.5) && (en > -1.5); - auto rc1p = [](auto y) - { - auto r = zero(as(y)); - auto notdone = true_(as(y)); //!= mone(as(y)); - if( eve::any(notdone) ) + if( eve::all(fmn * q < an) ) break; + + xn = (xn + lambda) * T(0.25); // / 4; + yn = (yn + lambda) * T(0.25); // / 4; + zn = (zn + lambda) * T(0.25); // / 4; + pn = (pn + lambda) * T(0.25); // / 4; + delta *= T(0.015625); // /= 64; + } + auto fmninvan = fmn * rec(an); + auto xx = (a0 - x) * fmninvan; + auto yy = (a0 - y) * fmninvan; + auto zz = (a0 - z) * fmninvan; + p = -(xx + yy + zz) / 2; + auto p2 = sqr(p); + auto xxyy = xx * yy; + auto e2 = xxyy + fma(xx, zz, fms(yy, zz, 3 * p2)); + auto p3 = p * p2; + auto xxyyzz = xxyy * zz; + auto e3 = xxyyzz + 2 * e2 * p + 4 * p3; + auto e4 = (2 * xxyyzz + e2 * p + 3 * p3) * p; + auto e5 = xxyyzz * p2; + auto e22 = sqr(e2); + auto result = fmninvan * rsqrt(an) + * (1 - 3 * e2 / 14 + e3 / 6 + 9 * e22 / 88 - 3 * e4 / 22 - 9 * e2 * e3 / 52 + + 3 * e5 / 26 - e2 * e22 / 16 + 3 * e3 * e3 / 40 + 3 * e2 * e4 / 20 + + 45 * e22 * e3 / 272 - 9 * (e3 * e4 + e2 * e5) / 68); + + result += 6 * rc_sum; + return result; + } + else { - auto br_yltm1 = [](auto my) { return rsqrt(my) * ellint_rc(my, dec(my)); }; - notdone = next_interval(br_yltm1, notdone, y < mone(as(y)), r, -y); + auto r = nan(as(x)); + auto notdone = is_nltz(x) && is_nltz(y) && is_nltz(z) && is_nez(p) && is_nez(x + y) + && is_nez(z + y) && is_nez(x + z); if( eve::any(notdone) ) { - auto br_ygt0 = [](auto y) - { - return atan(sqrt(y)) * rsqrt(y); - ; - }; - notdone = next_interval(br_ygt0, notdone, is_gtz(y), r, y); + auto br_pneg = [](auto x, auto y, auto z, auto p) // p < 0 + { + auto cond_swap = [](auto cond, auto& a, auto& b) + { + auto aa = if_else(cond, a, b); + auto bb = if_else(cond, b, a); + a = aa; + b = bb; + }; + cond_swap(x < y, x, y); + cond_swap(x < z, x, z); + cond_swap(y > z, y, z); + // now all(x <= y) and all(y <= z) + auto q = -p; + p = fms(z, (x + y + q), x * y) / (z + q); + auto v = (p - z) * ellint_rj(x, y, z, p); + v -= 3 * ellint_rf(x, y, z); + auto pq = p * q; + auto tmp = fma(x, y, pq); + v += 3 * sqrt((x * y * z) / tmp) * ellint_rc(tmp, pq); + v /= (z + q); + return v; + }; + notdone = next_interval(br_pneg, notdone, is_ltz(p), r, x, y, z, p); if( eve::any(notdone) ) { - auto arg = sqrt(-y); - auto log1parg = log1p(arg); - auto br_ygtmhf = [arg, log1parg]() - { return if_else(is_eqz(arg), T(1), (log1parg - log1p(-arg)) / (2 * arg)); }; - notdone = next_interval(br_ygtmhf, notdone, y > T(-0.5), r); + auto br_eqxy = [](auto x, auto p) // (x == y) && (x == z) + { + auto rsqtx = rsqrt(x); + return if_else(x == p, rsqtx / x, 3 * ellint_rc(x, p) - rsqtx / (x - p)); + }; + notdone = next_interval(br_eqxy, notdone, (x == y) && (x == z), r, x, p); if( eve::any(notdone) ) { - auto br_last = [arg, log1parg](auto y) { return log1parg * rsqrt(inc(y)) / arg; }; - last_interval(br_last, notdone, r, y); + auto br_eqzp = [](auto x, auto y, auto z) // (p == z) + { return ellint_rd(x, y, z); }; + notdone = next_interval(br_eqzp, notdone, (z == p), r, x, y, z); + if( eve::any(notdone) ) + { + x = if_else(notdone, x, one); + y = if_else(notdone, y, one); + z = if_else(notdone, z, one); + p = if_else(notdone, p, one); + auto br_last = [](auto x, auto y, auto z, auto p) { return ellint_rj[raw](x, y, z, p); }; + last_interval(br_last, notdone, r, x, y, z, p); + } } } } + return r; } - return r; - }; - - if( eve::any(test) ) - { - // - // occationally en ~ -1, we then have no means of calculating - // rc(1, 1+en) without terrible cancellation error, so we - // need to get to 1+en directly. by substitution we have - // - // 1+e_0 = 1 + (p-x)*(p-y)*(p-z)/((sqrt(p) + sqrt(x))*(sqrt(p)+sqrt(y))*(sqrt(p)+sqrt(z)))^2 - // = 2*sqrt(p)*(p+sqrt(x) * (sqrt(y)+sqrt(z)) + sqrt(y)*sqrt(z)) / ((sqrt(p) + - // sqrt(x))*(sqrt(p) + sqrt(y)*(sqrt(p)+sqrt(z)))) - // - // and since this is just an application of the duplication formula for rj, the same - // expression works for 1+en if we use x,y,z,p_n etc. - // this branch is taken only once or twice at the start of iteration, - // after than en reverts to it's usual very small values. - // - auto b = 2 * rp * (pn + rx * (ry + rz) + ry * rz) / dn; - auto r0 = ellint_rc(T(1), b); - auto r1 = rc1p(en); - auto tmp = if_else(test, r0, r1); - rc_sum += fmn / dn * tmp; } else { - auto r = rc1p(en); - rc_sum += fmn / dn * r; + return arithmetic_call(ellint_rj[o], x, y, z, p); } - auto lambda = fma(rx, ry, fma(rx, rz, ry * rz)); - - // from here on we move to n+1: - an = (an + lambda) * T(0.25); // / 4; - fmn /= 4; - - if( eve::all(fmn * q < an) ) break; - - xn = (xn + lambda) * T(0.25); // / 4; - yn = (yn + lambda) * T(0.25); // / 4; - zn = (zn + lambda) * T(0.25); // / 4; - pn = (pn + lambda) * T(0.25); // / 4; - delta *= T(0.015625); // /= 64; } - auto fmninvan = fmn * rec(an); - auto xx = (a0 - x) * fmninvan; - auto yy = (a0 - y) * fmninvan; - auto zz = (a0 - z) * fmninvan; - p = -(xx + yy + zz) / 2; - auto p2 = sqr(p); - auto xxyy = xx * yy; - auto e2 = xxyy + fma(xx, zz, fms(yy, zz, 3 * p2)); - auto p3 = p * p2; - auto xxyyzz = xxyy * zz; - auto e3 = xxyyzz + 2 * e2 * p + 4 * p3; - auto e4 = (2 * xxyyzz + e2 * p + 3 * p3) * p; - auto e5 = xxyyzz * p2; - auto e22 = sqr(e2); - auto result = fmninvan * rsqrt(an) - * (1 - 3 * e2 / 14 + e3 / 6 + 9 * e22 / 88 - 3 * e4 / 22 - 9 * e2 * e3 / 52 - + 3 * e5 / 26 - e2 * e22 / 16 + 3 * e3 * e3 / 40 + 3 * e2 * e4 / 20 - + 45 * e22 * e3 / 272 - 9 * (e3 * e4 + e2 * e5) / 68); - - result += 6 * rc_sum; - return result; - // auto r = nan(as(x)); - // auto notdone = is_nltz(x) && is_nltz(y) && is_nltz(z) && is_nez(p) && is_nez( y + z) && - // is_nez(z + x) && is_nez(y + x); - // // any parameterx, y, z nan or less than zero or more than one zero or p zero implies - // nan auto br0 = [x, y, z](){return raw(ellint_rj)(x, y, z);}; last_interval(br0, notdone, - // r); return r; -} - -// ----------------------------------------------------------------------------------------------- -// Masked cases -template -EVE_FORCEINLINE auto -ellint_rj_(EVE_SUPPORTS(cpu_), C const& cond, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rj(t0, ts...), t0) ) -{ - return mask_op(cond, eve::ellint_rj, t0, ts ...); -} - -template -EVE_FORCEINLINE auto -ellint_rj_(EVE_SUPPORTS(cpu_), C const& cond, D const & d, T0 t0, Ts ... ts) noexcept --> decltype( if_else(cond, ellint_rj(t0, ts...), t0) ) -{ - return mask_op(cond, d(eve::ellint_rj), t0, ts ...); -} }