diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 589f4f7..9579a34 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -41,7 +41,7 @@ jobs: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - name: test - run: cargo test --no-default-features -F libm && cargo test --no-default-features -F libm,24bits && cargo test --no-default-features -F libm,50bits && cargo test --no-default-features -F libm,24bits,50bits && cargo test --no-default-features -F libm,24bits,estrin && cargo test --no-default-features -F libm,50bits,estrin && cargo test --no-default-features -F libm,24bits,50bits,estrin && cargo test --no-default-features -F std && cargo test --no-default-features -F std,24bits && cargo test --no-default-features -F std,50bits && cargo test --no-default-features -F std,24bits,50bits && cargo test --no-default-features -F std,24bits,estrin && cargo test --no-default-features -F std,50bits,estrin && cargo test --no-default-features -F std,24bits,50bits,estrin && cargo test --no-default-features -F std,libm && cargo test --no-default-features -F std,libm,24bits && cargo test --no-default-features -F std,libm,50bits && cargo test --no-default-features -F std,libm,24bits,50bits && cargo test --no-default-features -F std,libm,24bits,estrin && cargo test --no-default-features -F std,libm,50bits,estrin && cargo test --no-default-features -F std,libm,24bits,50bits,estrin + run: cargo test --no-default-features -F libm && cargo test --no-default-features -F libm,24bits && cargo test --no-default-features -F libm,50bits && cargo test --no-default-features -F libm,24bits,50bits && cargo test --no-default-features -F std && cargo test --no-default-features -F std,24bits && cargo test --no-default-features -F std,50bits && cargo test --no-default-features -F std,24bits,50bits && cargo test --no-default-features -F std,libm && cargo test --no-default-features -F std,libm,24bits && cargo test --no-default-features -F std,libm,50bits && cargo test --no-default-features -F std,libm,24bits,50bits #run: cargo install cargo-all-features && cargo test-all-features doc: diff --git a/CHANGELOG.md b/CHANGELOG.md index 45b0257..5743885 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ This file contains the changes to the crate since version 0.1.1. +## Unreleased + +- Remove `estrin` feature. + If it was activated anywhere in the dependency tree the crate became less + accurate for all users (as the compiler assumes that features are additive), + without them being able to do anything about it. + ## 0.5.9 - Add the `LambertW` trait that lets the user call the Lambert W functions diff --git a/Cargo.lock b/Cargo.lock index 621cf84..83960c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,15 +181,6 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" -[[package]] -name = "fast_polynomial" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62eea6ee590b08a5f8b1139f4d6caee195b646d0c07e4b1808fbd5c4dea4829a" -dependencies = [ - "num-traits", -] - [[package]] name = "half" version = "2.4.1" @@ -243,11 +234,10 @@ dependencies = [ [[package]] name = "lambert_w" -version = "0.5.9" +version = "1.0.0" dependencies = [ "approx", "criterion", - "fast_polynomial", "libm", "rand", ] @@ -283,7 +273,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", - "libm", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b176605..35b6559 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lambert_w" -version = "0.5.9" +version = "1.0.0" edition = "2021" authors = ["Johanna Sörngård "] categories = ["mathematics", "no-std::no-alloc"] @@ -12,7 +12,6 @@ repository = "https://github.com/JSorngard/lambert_w" documentation = "https://docs.rs/lambert_w" [dependencies] -fast_polynomial = { version = "0.3.0", default-features = false, optional = true } libm = { version = "0.2.8", optional = true } [dev-dependencies] @@ -26,17 +25,12 @@ default = ["24bits", "50bits", "libm"] 50bits = [] # Enables the function versions with 24 bits of accuracy on 64-bit floats, as well as the implementation on 32-bit floats. 24bits = [] -# Uses [Estrin's scheme](https://en.wikipedia.org/wiki/Estrin's_scheme) to evaluate the polynomials in the rational functions. -# While this results in more assembly instructions, they are mostly independent of each other, -# and this increases instruction level parallelism on modern hardware for a total performance gain. -# May result in slight numerical instability, which can be reduced if the target CPU has fused multiply-add instructions. -estrin = ["dep:fast_polynomial"] # If the `std` feature is disabled, this feature uses the [`libm`](https://crates.io/crates/libm) crate # to compute square roots and logarithms instead of the standard library. -libm = ["dep:libm", "fast_polynomial?/libm"] +libm = ["dep:libm"] # Use the standard library to compute square roots and logarithms for a potential performance gain. # When this feature is disabled the crate is `no_std` compatible. -std = ["fast_polynomial?/std"] +std = [] [package.metadata.docs.rs] # Document all features. diff --git a/README.md b/README.md index 3dca204..04ab93c 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,6 @@ but this reduction has not been quantified by the author of this crate. The crate is `no_std` compatible, but can optionally depend on the standard library through features for a potential performance gain. -The API of the crate is stable and the only -reason it's not at version `1.0.0` is because its -dependencies are not. - ## Examples Compute the value of the diff --git a/src/lib.rs b/src/lib.rs index f545088..39cc474 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,8 +17,6 @@ //! //! The crate is `no_std` compatible, but can optionally depend on the standard library through features for a potential performance gain. //! -//! The API of the crate is stable and the only reason it's not at version `1.0.0` is because its dependencies are not. -//! //! ## Examples //! //! Compute the value of the [omega constant](https://en.wikipedia.org/wiki/Omega_constant) with the principal branch of the Lambert W function: @@ -117,11 +115,6 @@ assert_abs_diff_eq!(z.lambert_w0(), f64::ln(2.0)); //! //! You can disable one of the above features to potentially save a little bit of binary size. //! -//! `estrin`: uses [Estrin's scheme](https://en.wikipedia.org/wiki/Estrin's_scheme) to evaluate the polynomials in the rational functions. -//! While this results in more assembly instructions, they are mostly independent of each other, -//! and this increases instruction level parallelism on modern hardware for a total performance gain. -//! May result in slight numerical instability, which can be reduced if the target CPU has fused multiply-add instructions. -//! //! One of the below features must be enabled: //! //! `std`: use the standard library to compute square roots and logarithms @@ -430,10 +423,6 @@ impl LambertW for f64 { #[cfg(all(test, any(feature = "24bits", feature = "50bits")))] mod test { - // A lot of these tests are less stringent when the `estrin` feature flag is activated. - // This is because Estrin's scheme is less numerically stable, - // and CI may also not have fused multiply-add instructions to reduce the instabillity. - use super::LambertW; #[cfg(feature = "50bits")] use super::{lambert_w0, lambert_wm1}; @@ -454,14 +443,7 @@ mod test { lambert_w0(6.321_205_588_285_577e-1), 4.167_039_988_177_658e-1 ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0(9.632120558828557), 1.721757710976171); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(9.632120558828557), - 1.721757710976171, - epsilon = 1e-14 - ); assert_abs_diff_eq!(lambert_w0(9.963_212_055_882_856e1), 3.382785211058958); assert_abs_diff_eq!(lambert_w0(9.996_321_205_588_285e2), 5.249293782013269); assert_abs_diff_eq!( @@ -469,30 +451,9 @@ mod test { 7.231813718542178, epsilon = 1e-14 ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0(9.999_963_212_055_883e4), 9.284_568_107_521_96); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(9.999963212055883e+04), - 9.284568107521959, - epsilon = 1e-14 - ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0(9.999_996_321_205_589e5), 1.138_335_774_796_812e1); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(9.999996321205589e+05), - 1.138335774796812e+01, - epsilon = 1e-14 - ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0(9.999_999_632_120_559e6), 1.351_434_397_605_273e1); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(9.999999632120559e+06), - 1.351434397605273e+01, - epsilon = 1e-14 - ); assert_abs_diff_eq!( lambert_w0(9.999_999_963_212_056e7), 1.566_899_671_199_287e1, @@ -508,34 +469,20 @@ mod test { 2.002_868_541_326_992e1, epsilon = 1e-14 ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!( lambert_w0(9.999_999_999_963_213e10), 2.222_712_273_495_755e1 ); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(9.999999999963213e+10), - 2.222712273495755e+01, - epsilon = 1e-14 - ); assert_abs_diff_eq!( lambert_w0(9.999_999_999_996_321e11), 2.443_500_440_493_456e1, epsilon = 1e-14 ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!( lambert_w0(9.999_999_999_999_633e12), 2.665_078_750_870_219e1, epsilon = 1e-14 ); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(9.999999999999633e+12), - 2.665078750870219e+01, - epsilon = 1e-13 - ); assert_abs_diff_eq!( lambert_w0(9.999_999_999_999_963e13), 2.887_327_487_929_93e1, @@ -547,32 +494,11 @@ mod test { epsilon = 1e-14 ); assert_abs_diff_eq!(lambert_w0(1e16), 3.333_476_076_844_818e1, epsilon = 1e-14); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0(1e17), 3.557_237_716_651_325e1); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(1.000000000000000e+17), - 3.557237716651325e+01, - epsilon = 1e-14 - ); assert_abs_diff_eq!(lambert_w0(1e18), 3.781_385_607_558_877e1, epsilon = 1e-14); assert_abs_diff_eq!(lambert_w0(1e19), 4.005_876_916_198_432e1, epsilon = 1e-14); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0(1e20), 4.230_675_509_173_839e1); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(1.000000000000000e+20), - 4.230675509173839e+01, - epsilon = 1e-14 - ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0(1e40), 8.763_027_715_194_72e1); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_w0(1.000000000000000e+40), - 8.763027715194720e+01, - epsilon = 1e-13 - ); assert_abs_diff_eq!(lambert_w0(1e80), 1.790_193_137_415_062e2, epsilon = 1e-13); assert_abs_diff_eq!(lambert_w0(1e120), 2.707_091_661_024_979e2, epsilon = 1e-13); assert_abs_diff_eq!(lambert_w0(1e160), 3.625_205_337_614_976e2); @@ -701,32 +627,17 @@ mod test { assert_abs_diff_eq!(lambert_w0f(9.632_12), 1.721_757_8, epsilon = 1e-6); assert_abs_diff_eq!(lambert_w0f(9.963_212e1), 3.382_785_3, epsilon = 1e-6); assert_abs_diff_eq!(lambert_w0f(9.996_321_4e2), 5.249_294, epsilon = 1e-6); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0f(9.999_632e3), 7.231_814, epsilon = 1e-7); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!(lambert_w0f(9.999_632e3), 7.231_814, epsilon = 1e-6); assert_abs_diff_eq!(lambert_w0f(9.999_963e4), 9.284_568, epsilon = 1e-6); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0f(9.999_996e5), 1.138_335_8e1, epsilon = 1e-8); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!(lambert_w0f(9.999_996e5), 1.138_335_8e1, epsilon = 1e-6); assert_abs_diff_eq!(lambert_w0f(1e7), 1.351_434_4e1, epsilon = 1e-6); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0f(1e8), 1.566_899_7e1, epsilon = 1e-6); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!(lambert_w0f(1e8), 1.566_899_7e1, epsilon = 1e-5); assert_abs_diff_eq!(lambert_w0f(1e9), 1.784_172_6e1, epsilon = 1e-6); assert_abs_diff_eq!(lambert_w0f(1e10), 2.002_868_5e1, epsilon = 1e-5); assert_abs_diff_eq!(lambert_w0f(1e11), 2.222_712_3e1, epsilon = 1e-5); assert_abs_diff_eq!(lambert_w0f(1e12), 2.443_500_5e1, epsilon = 1e-5); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0f(1e13), 2.665_078_7e1, epsilon = 1e-6); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!(lambert_w0f(1e13), 2.665_078_7e1, epsilon = 1e-5); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_w0f(1e14), 2.887_327_6e1, epsilon = 1e-6); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!(lambert_w0f(1e14), 2.887_327_6e1, epsilon = 1e-5); assert_abs_diff_eq!(lambert_w0f(1e15), 3.110_152e1, epsilon = 1e-5); assert_abs_diff_eq!(lambert_w0f(1e16), 3.333_476_3e1, epsilon = 1e-5); assert_abs_diff_eq!(lambert_w0f(1e17), 3.557_237_6e1, epsilon = 1e-5); @@ -751,14 +662,7 @@ mod test { epsilon = 1e-14 ); assert_abs_diff_eq!(lambert_wm1(-1e-1), -3.577152063957297); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_wm1(-3e-2), -5.144482721515681); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_wm1(-3.000000000000000e-02), - -5.144482721515681, - epsilon = 1e-14 - ); assert_abs_diff_eq!(lambert_wm1(-1e-2), -6.472775124394005, epsilon = 1e-14); assert_abs_diff_eq!(lambert_wm1(-3e-3), -7.872521380098709, epsilon = 1e-14); assert_abs_diff_eq!(lambert_wm1(-1e-3), -9.118006470402742, epsilon = 1e-14); @@ -787,17 +691,10 @@ mod test { -1.778_749_628_219_512e2, epsilon = 1e-13 ); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!( lambert_wm1(-1.000000000000008e-145), -3.397_029_099_254_29e2 ); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_wm1(-1.000000000000008e-145), - -3.397029099254290e+02, - epsilon = 1e-12 - ); assert!(lambert_wm1(f64::EPSILON).is_nan()); } @@ -858,14 +755,7 @@ mod test { fn test_lambert_wm1f() { assert!(lambert_wm1f(-1.0 / core::f32::consts::E - f32::EPSILON).is_nan()); assert_abs_diff_eq!(lambert_wm1f(-3.578_794_3e-1), -1.253_493_8, epsilon = 1e-6); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_wm1f(-2.678_794_3e-1), -2.020_625, epsilon = 1e-7); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!( - lambert_wm1f(-2.678_794_411_714_424e-1), - -2.020625228775403, - epsilon = 1e-6 - ); assert_abs_diff_eq!(lambert_wm1f(-1e-1), -3.577_152, epsilon = 1e-6); assert_abs_diff_eq!(lambert_wm1f(-3e-2), -5.144_482_6, epsilon = 1e-9); assert_abs_diff_eq!(lambert_wm1f(-1e-2), -6.472_775, epsilon = 1e-6); @@ -875,10 +765,7 @@ mod test { assert_abs_diff_eq!(lambert_wm1f(-1e-4), -1.166_711_4e1, epsilon = 1e-6); assert_abs_diff_eq!(lambert_wm1f(-3e-5), -1.297_753_2e1, epsilon = 1e-6); assert_abs_diff_eq!(lambert_wm1f(-1e-5), -1.416_360_1e1, epsilon = 1e-6); - #[cfg(not(feature = "estrin"))] assert_abs_diff_eq!(lambert_wm1f(-1e-20), -49.962_986); - #[cfg(feature = "estrin")] - assert_abs_diff_eq!(lambert_wm1f(-1e-20), -49.962_986, epsilon = 1e-5); assert!(lambert_wm1f(f32::EPSILON).is_nan()); } diff --git a/src/rational.rs b/src/rational.rs index cdc15a4..98fa9bf 100644 --- a/src/rational.rs +++ b/src/rational.rs @@ -1,40 +1,19 @@ -//! Rational functions that use Estrin's scheme for better performance if the `estrin` feature is enabled, -//! and otherwise they use the more typical Horner's method. -//! -//! If the `estrin` feature is enabled and the target CPU has the FMA feature these functions will -//! use fused multiply-add instructions. +//! Rational functions. // The #[inline(always)] annotations are motivated by benchmarks, especially of the 50 bit functions. -#[cfg(all(feature = "24bits", feature = "estrin"))] -use fast_polynomial::polynomials::{poly_3, poly_4}; - -#[cfg(all(feature = "50bits", feature = "estrin"))] -use fast_polynomial::polynomials::{poly_7, poly_8}; - #[cfg(feature = "24bits")] /// Rational function consisting of two third degree polynomials. /// /// The first set of coefficients are for the polynomial in the numerator /// and the second set are the coefficients of the polynomial in the denominator. -/// -/// If the `estrin` feature is enabled this uses Estrin's scheme and fused multiply-add instructions, otherwise it uses the more typical Horner's method. #[inline(always)] pub(crate) fn rational_3_over_3( x: f64, [n0, n1, n2, n3]: [f64; 4], [d0, d1, d2, d3]: [f64; 4], ) -> f64 { - #[cfg(feature = "estrin")] - { - let x2 = x * x; - poly_3(x, x2, n0, n1, n2, n3) / poly_3(x, x2, d0, d1, d2, d3) - } - - #[cfg(not(feature = "estrin"))] - { - (n0 + x * (n1 + x * (n2 + x * n3))) / (d0 + x * (d1 + x * (d2 + x * d3))) - } + (n0 + x * (n1 + x * (n2 + x * n3))) / (d0 + x * (d1 + x * (d2 + x * d3))) } #[cfg(feature = "24bits")] @@ -42,24 +21,13 @@ pub(crate) fn rational_3_over_3( /// /// The first set of coefficients are for the polynomial in the numerator /// and the second set are the coefficients of the polynomial in the denominator. -/// -/// If the `estrin` feature is enabled this uses Estrin's scheme and fused multiply-add instructions, otherwise it uses the more typical Horner's method. #[inline(always)] pub(crate) fn rational_3_over_3f( x: f32, [n0, n1, n2, n3]: [f32; 4], [d0, d1, d2, d3]: [f32; 4], ) -> f32 { - #[cfg(feature = "estrin")] - { - let x2 = x * x; - poly_3(x, x2, n0, n1, n2, n3) / poly_3(x, x2, d0, d1, d2, d3) - } - - #[cfg(not(feature = "estrin"))] - { - (n0 + x * (n1 + x * (n2 + x * n3))) / (d0 + x * (d1 + x * (d2 + x * d3))) - } + (n0 + x * (n1 + x * (n2 + x * n3))) / (d0 + x * (d1 + x * (d2 + x * d3))) } #[cfg(feature = "24bits")] @@ -67,25 +35,13 @@ pub(crate) fn rational_3_over_3f( /// /// The first set of coefficients are for the polynomial in the numerator /// and the second set are the coefficients of the polynomial in the denominator. -/// -/// If the `estrin` feature is enabled this uses Estrin's scheme and fused multiply-add instructions, otherwise it uses the more typical Horner's method. #[inline(always)] pub(crate) fn rational_4_over_3( x: f64, [n0, n1, n2, n3, n4]: [f64; 5], [d0, d1, d2, d3]: [f64; 4], ) -> f64 { - #[cfg(feature = "estrin")] - { - let x2 = x * x; - let x4 = x2 * x2; - poly_4(x, x2, x4, n0, n1, n2, n3, n4) / poly_3(x, x2, d0, d1, d2, d3) - } - - #[cfg(not(feature = "estrin"))] - { - (n0 + x * (n1 + x * (n2 + x * (n3 + x * n4)))) / (d0 + x * (d1 + x * (d2 + x * d3))) - } + (n0 + x * (n1 + x * (n2 + x * (n3 + x * n4)))) / (d0 + x * (d1 + x * (d2 + x * d3))) } #[cfg(feature = "24bits")] @@ -93,25 +49,13 @@ pub(crate) fn rational_4_over_3( /// /// The first set of coefficients are for the polynomial in the numerator /// and the second set are the coefficients of the polynomial in the denominator. -/// -/// If the `estrin` feature is enabled this uses Estrin's scheme and fused multiply-add instructions, otherwise it uses the more typical Horner's method. #[inline(always)] pub(crate) fn rational_4_over_3f( x: f32, [n0, n1, n2, n3, n4]: [f32; 5], [d0, d1, d2, d3]: [f32; 4], ) -> f32 { - #[cfg(feature = "estrin")] - { - let x2 = x * x; - let x4 = x2 * x2; - poly_4(x, x2, x4, n0, n1, n2, n3, n4) / poly_3(x, x2, d0, d1, d2, d3) - } - - #[cfg(not(feature = "estrin"))] - { - (n0 + x * (n1 + x * (n2 + x * (n3 + x * n4)))) / (d0 + x * (d1 + x * (d2 + x * d3))) - } + (n0 + x * (n1 + x * (n2 + x * (n3 + x * n4)))) / (d0 + x * (d1 + x * (d2 + x * d3))) } #[cfg(feature = "50bits")] @@ -119,26 +63,14 @@ pub(crate) fn rational_4_over_3f( /// /// The first set of coefficients are for the polynomial in the numerator /// and the second set are the coefficients of the polynomial in the denominator. -/// -/// If the `estrin` feature is enabled this uses Estrin's scheme and fused multiply-add instructions, otherwise it uses the more typical Horner's method. #[inline(always)] pub(crate) fn rational_7_over_7( x: f64, [n0, n1, n2, n3, n4, n5, n6, n7]: [f64; 8], [d0, d1, d2, d3, d4, d5, d6, d7]: [f64; 8], ) -> f64 { - #[cfg(feature = "estrin")] - { - let x2 = x * x; - let x4 = x2 * x2; - poly_7(x, x2, x4, n0, n1, n2, n3, n4, n5, n6, n7) - / poly_7(x, x2, x4, d0, d1, d2, d3, d4, d5, d6, d7) - } - #[cfg(not(feature = "estrin"))] - { - (n0 + x * (n1 + x * (n2 + x * (n3 + x * (n4 + x * (n5 + x * (n6 + x * n7))))))) - / (d0 + x * (d1 + x * (d2 + x * (d3 + x * (d4 + x * (d5 + x * (d6 + x * d7))))))) - } + (n0 + x * (n1 + x * (n2 + x * (n3 + x * (n4 + x * (n5 + x * (n6 + x * n7))))))) + / (d0 + x * (d1 + x * (d2 + x * (d3 + x * (d4 + x * (d5 + x * (d6 + x * d7))))))) } #[cfg(feature = "50bits")] @@ -146,26 +78,12 @@ pub(crate) fn rational_7_over_7( /// /// The first set of coefficients are for the polynomial in the numerator /// and the second set are the coefficients of the polynomial in the denominator. -/// -/// If the `estrin` feature is enabled this uses Estrin's scheme and fused multiply-add instructions, otherwise it uses the more typical Horner's method. #[inline(always)] pub(crate) fn rational_8_over_7( x: f64, [n0, n1, n2, n3, n4, n5, n6, n7, n8]: [f64; 9], [d0, d1, d2, d3, d4, d5, d6, d7]: [f64; 8], ) -> f64 { - #[cfg(feature = "estrin")] - { - let x2 = x * x; - let x4 = x2 * x2; - let x8 = x4 * x4; - poly_8(x, x2, x4, x8, n0, n1, n2, n3, n4, n5, n6, n7, n8) - / poly_7(x, x2, x4, d0, d1, d2, d3, d4, d5, d6, d7) - } - - #[cfg(not(feature = "estrin"))] - { - (n0 + x * (n1 + x * (n2 + x * (n3 + x * (n4 + x * (n5 + x * (n6 + x * (n7 + x * n8)))))))) - / (d0 + x * (d1 + x * (d2 + x * (d3 + x * (d4 + x * (d5 + x * (d6 + x * d7))))))) - } + (n0 + x * (n1 + x * (n2 + x * (n3 + x * (n4 + x * (n5 + x * (n6 + x * (n7 + x * n8)))))))) + / (d0 + x * (d1 + x * (d2 + x * (d3 + x * (d4 + x * (d5 + x * (d6 + x * d7))))))) }