From 82d0266d0474411779eeaf396a957089e718dda9 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 21 Nov 2024 13:06:04 +0100 Subject: [PATCH 1/7] removed useless branchs + mark as constexpr --- include/eve/arch/cpu/top_bits.hpp | 22 ++-- .../eve/module/core/named_shuffles/blend.hpp | 68 ++++++------ .../core/named_shuffles/broadcast_lane.hpp | 60 +++++------ .../module/core/named_shuffles/reverse.hpp | 101 +++++++++--------- .../named_shuffles/reverse_in_subgroups.hpp | 47 ++++---- .../eve/module/core/named_shuffles/slide.hpp | 45 ++++---- .../core/named_shuffles/swap_adjacent.hpp | 44 ++++---- .../module/core/regular/impl/simd/x86/add.hpp | 56 +++++----- .../regular/impl/simd/x86/countl_zero.hpp | 4 +- .../module/core/regular/impl/simd/x86/div.hpp | 25 ++--- .../module/core/regular/impl/simd/x86/fma.hpp | 30 +++--- .../module/core/regular/impl/simd/x86/fms.hpp | 28 +++-- .../core/regular/impl/simd/x86/fnma.hpp | 30 +++--- .../core/regular/impl/simd/x86/fnms.hpp | 30 +++--- .../module/core/regular/impl/simd/x86/mul.hpp | 26 ++--- .../module/core/regular/impl/simd/x86/rec.hpp | 7 +- .../module/core/regular/impl/simd/x86/sub.hpp | 56 +++++----- 17 files changed, 326 insertions(+), 353 deletions(-) diff --git a/include/eve/arch/cpu/top_bits.hpp b/include/eve/arch/cpu/top_bits.hpp index 6fe3743dcc..e7acd805f9 100644 --- a/include/eve/arch/cpu/top_bits.hpp +++ b/include/eve/arch/cpu/top_bits.hpp @@ -114,24 +114,24 @@ namespace detail static constexpr bool is_cheap_impl() { - if ( has_emulated_abi_v ) return true; - if constexpr ( is_aggregated ) return top_bits::is_cheap; + if constexpr ( has_emulated_abi_v ) return true; + else if constexpr ( is_aggregated ) return top_bits::is_cheap; - if ( x86_abi ) return true; - if ( ppc_abi ) return true; + else if constexpr ( x86_abi ) return true; + else if constexpr ( ppc_abi ) return true; - if ( arm_abi ) + else if constexpr ( arm_abi ) { - if ( static_size == 1 ) return true; - if ( static_size * sizeof(scalar_type) <= 4 ) return true; - if ( current_api >= eve::asimd ) + if constexpr ( static_size == 1 ) return true; + else if constexpr ( static_size * sizeof(scalar_type) <= 4 ) return true; + else if constexpr ( current_api >= eve::asimd ) { - if ( sizeof(scalar_type) >= 2 ) return true; + if constexpr ( sizeof(scalar_type) >= 2 ) return true; return static_size <= 8; // 16 chars is expensive } - return false; + else return false; } - return false; + else return false; } public: diff --git a/include/eve/module/core/named_shuffles/blend.hpp b/include/eve/module/core/named_shuffles/blend.hpp index 42ed4339c6..cf9410821c 100644 --- a/include/eve/module/core/named_shuffles/blend.hpp +++ b/include/eve/module/core/named_shuffles/blend.hpp @@ -95,42 +95,50 @@ struct blend_t level(as {}, as {}, g, p1)); } - if( ((I == 0) && ...) ) return 0; - if( ((I == 1) && ...) ) return 0; + else if constexpr ( ((I == 0) && ...) ) return 0; + else if constexpr ( ((I == 1) && ...) ) return 0; - if( current_api >= sve ) return logical_simd_value ? 6 : 2; - if( current_api >= avx512 ) return logical_simd_value ? 6 : 2; - if( current_api >= vmx ) return 3; - - const std::ptrdiff_t g_size = sizeof(element_type_t) * G; - const std::size_t reg_size = sizeof(element_type_t) * T::size(); - const std::size_t count_from_x = ((I == 0) + ...); - const std::size_t count_from_y = ((I == 1) + ...); - - if( current_api >= neon ) + else if constexpr ( current_api >= sve ) return logical_simd_value ? 6 : 2; + else if constexpr ( current_api >= avx512 ) return logical_simd_value ? 6 : 2; + else if constexpr ( current_api >= vmx ) return 3; + else { - if( current_api >= asimd && (count_from_x == 1 || count_from_y == 1) ) return 2; - return 3; - } + constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; + constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); + constexpr std::size_t count_from_x = ((I == 0) + ...); + constexpr std::size_t count_from_y = ((I == 1) + ...); - if( current_api >= sse2 ) - { - if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) + if constexpr ( current_api >= neon ) { - using half_t = decltype(T {}.slice(lower_)); - auto [p0, p1] = detail::idxm::slice_pattern::size() / 2>(p); - auto l0 = level(as {}, as {}, g, p0); - auto l1 = level(as {}, as {}, g, p1); - return detail::idxm::add_shuffle_levels(std::array {l0, l1, 4}); + if constexpr ( current_api >= asimd && (count_from_x == 1 || count_from_y == 1) ) return 2; + return 3; + } + else if constexpr ( current_api >= sse2 ) + { + if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) + { + using half_t = decltype(T {}.slice(lower_)); + auto [p0, p1] = detail::idxm::slice_pattern::size() / 2>(p); + auto l0 = level(as {}, as {}, g, p0); + auto l1 = level(as {}, as {}, g, p1); + return detail::idxm::add_shuffle_levels(std::array {l0, l1, 4}); + } + else if constexpr ( current_api >= sse4_1 ) + { + return g_size >= 4 ? 2 : 3; + } + else + { + if constexpr ( g_size >= 8 ) return 2; + else if constexpr ( g_size == 2 && reg_size == 4 ) return 6; + else return 7; + } + } + else + { + return 2; } - if( current_api >= sse4_1 ) return g_size >= 4 ? 2 : 3; - - if( g_size >= 8 ) return 2; - if( g_size == 2 && reg_size == 4 ) return 6; - return 7; } - - return 2; } template diff --git a/include/eve/module/core/named_shuffles/broadcast_lane.hpp b/include/eve/module/core/named_shuffles/broadcast_lane.hpp index fbbf0b1ea2..bcdc341c7c 100644 --- a/include/eve/module/core/named_shuffles/broadcast_lane.hpp +++ b/include/eve/module/core/named_shuffles/broadcast_lane.hpp @@ -68,8 +68,8 @@ struct broadcast_lane_t template static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t i) { - const std::size_t reg_size = sizeof(element_type_t) * T::size(); - const std::ptrdiff_t g_size = sizeof(element_type_t) * G; + constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); + constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; if constexpr( eve::has_aggregated_abi_v ) { @@ -80,47 +80,47 @@ struct broadcast_lane_t else if constexpr( current_api >= vmx ) return 2; else if constexpr( current_api >= sve ) { - if( !logical_value ) return g_size > 8 ? 3 : 2; - if( G == 1 ) return 4; - if( g_size <= 8 ) return 6; - return 7; + if constexpr ( !logical_value ) return g_size > 8 ? 3 : 2; + else if constexpr ( G == 1 ) return 4; + else if constexpr ( g_size <= 8 ) return 6; + else return 7; } else if constexpr( current_api >= neon ) { - if( current_api >= asimd ) return 2; - if( reg_size <= 8 ) return 2; - return 4; + if constexpr ( current_api >= asimd ) return 2; + else if constexpr ( reg_size <= 8 ) return 2; + else return 4; } // x86 - if (current_api == avx512 && logical_value) + if constexpr (current_api == avx512 && logical_value) { - if (G == 1) return 4; - return level(detail::mask_type(tgt), g, i) + 4; + if constexpr (G == 1) return 4; + else return level(detail::mask_type(tgt), g, i) + 4; } - - if (reg_size == 64) + else if constexpr (reg_size == 64) { - if (g_size >= 16) return 2; - if (g_size >= 2) return 3; - return 4; + if constexpr (g_size >= 16) return 2; + else if constexpr (g_size >= 2) return 3; + else return 4; } - - if (reg_size == 32) + else if constexpr (reg_size == 32) { - if (g_size >= 16) return 2; - if (current_api == avx) return 4; - if (g_size >= 8) return 2; - if (g_size >= 4) return 3; - if (g_size >= 2 && current_api == avx512) return 3; - return 4; + if constexpr (g_size >= 16) return 2; + if constexpr (current_api == avx) return 4; + if constexpr (g_size >= 8) return 2; + if constexpr (g_size >= 4) return 3; + if constexpr (g_size >= 2 && current_api == avx512) return 3; + else return 4; + } + else + { + if constexpr ( g_size >= 4 ) return 2; + else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2; + else if constexpr ( current_api >= ssse3 ) return 3; + else return 4; } - - if ( g_size >= 4 ) return 2; - if ( g_size == 2 && reg_size <= 8 ) return 2; - if ( current_api >= ssse3 ) return 3; - return 4; } }; diff --git a/include/eve/module/core/named_shuffles/reverse.hpp b/include/eve/module/core/named_shuffles/reverse.hpp index 44612b86ec..ec89ef2264 100644 --- a/include/eve/module/core/named_shuffles/reverse.hpp +++ b/include/eve/module/core/named_shuffles/reverse.hpp @@ -66,68 +66,73 @@ struct reverse_t if constexpr( eve::has_aggregated_abi_v ) { if constexpr( G == T::size() / 2 ) return 0; - using half_t = decltype(T {}.slice(lower_)); - return level(as {}, g); + else + { + using half_t = decltype(T {}.slice(lower_)); + return level(as {}, g); + } } - - const std::ptrdiff_t g_size = sizeof(element_type_t) * G; - const std::size_t reg_size = sizeof(element_type_t) * T::size(); - const bool is_expected_cardinal = T::size() == eve::expected_cardinal_v>; - - if( current_api >= sve ) + else { - if( !logical_value ) + constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; + constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); + constexpr bool is_expected_cardinal = T::size() == eve::expected_cardinal_v>; + + if constexpr ( current_api >= sve ) { - if( reg_size <= 8 ) return 2; - if( is_expected_cardinal && g_size <= 8 ) return 2; - if( is_expected_cardinal && g_size == reg_size / 2 ) return 2; - return 3; + if constexpr ( !logical_value ) + { + if constexpr ( reg_size <= 8 ) return 2; + else if constexpr ( is_expected_cardinal && g_size <= 8 ) return 2; + else if constexpr ( is_expected_cardinal && g_size == reg_size / 2 ) return 2; + else return 3; + } + else if constexpr ( is_expected_cardinal && g_size <= 8 ) return 2; + else return level(detail::mask_type(tgt), g) + 4; } - if( is_expected_cardinal && g_size <= 8 ) return 2; - return level(detail::mask_type(tgt), g) + 4; - } - if (current_api >= neon) { - if ( reg_size <= 8 ) return 2; - if ( g_size == 8 ) return 2; - if ( current_api >= asimd ) return 3; - return 4; - } + else if constexpr (current_api >= neon) { + if constexpr ( reg_size <= 8 ) return 2; + if constexpr ( g_size == 8 ) return 2; + if constexpr ( current_api >= asimd ) return 3; + else return 4; + } - if( current_api >= vmx ) return 3; + else if constexpr ( current_api >= vmx ) return 3; - if( current_api == avx512 && logical_value ) { return level(detail::mask_type(tgt), g) + 4; } + else if constexpr ( current_api == avx512 && logical_value ) { return level(detail::mask_type(tgt), g) + 4; } - if( current_api >= avx2 && reg_size >= 32 ) - { - if( g_size >= 16 ) return 2; - if( g_size >= 8 ) return reg_size == 64 ? 3 : 2; - if( g_size >= 4 ) return 3; - if( g_size == 2 && current_api >= avx512 ) return 3; - return 5; - } + else if constexpr ( current_api >= avx2 && reg_size >= 32 ) + { + if constexpr ( g_size >= 16 ) return 2; + else if constexpr ( g_size >= 8 ) return reg_size == 64 ? 3 : 2; + else if constexpr ( g_size >= 4 ) return 3; + else if constexpr ( g_size == 2 && current_api >= avx512 ) return 3; + else return 5; + } - if( current_api == avx && reg_size >= 32 ) - { - if( g_size >= 16 ) return 2; - if( g_size >= 4 ) return 4; - if( g_size == 2 && current_api >= avx512 ) return 3; - return 9; - } + else if constexpr ( current_api == avx && reg_size >= 32 ) + { + if constexpr ( g_size >= 16 ) return 2; + else if constexpr ( g_size >= 4 ) return 4; + else if constexpr ( g_size == 2 && current_api >= avx512 ) return 3; + else return 9; + } - if( g_size >= 4 ) return 2; - if( g_size == 2 && reg_size <= 8 ) return 2; + else if constexpr ( g_size >= 4 ) return 2; + else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2; - if( current_api >= ssse3 ) return 3; + else if constexpr ( current_api >= ssse3 ) return 3; - if( g_size == 2 ) return 6; + else if constexpr ( g_size == 2 ) return 6; - // chars on sse2 - if( reg_size == 2 ) return 6; + // chars on sse2 + else if constexpr ( reg_size == 2 ) return 6; - // swap chars + reverse shorts - if( reg_size <= 8 ) return 8; - return 12; + // swap chars + reverse shorts + else if constexpr ( reg_size <= 8 ) return 8; + else return 12; + } } }; diff --git a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp index e1cfcc91f8..856d664892 100644 --- a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp +++ b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp @@ -79,9 +79,9 @@ struct reverse_in_subgroups_t template static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::fixed sub_g) { - const std::ptrdiff_t g_size = sizeof(element_type_t) * G; - const std::size_t sub_size = g_size * SubG; - const std::size_t reg_size = sizeof(element_type_t) * T::size(); + constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; + constexpr std::size_t sub_size = g_size * SubG; + constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); if constexpr( SubG == 1 ) return 0; else if constexpr( SubG == 2 ) return swap_adjacent.level(tgt, g); @@ -93,40 +93,37 @@ struct reverse_in_subgroups_t return level(as {}, g, sub_g); } - if( current_api >= sve ) + else if constexpr ( current_api >= sve ) { - if( !logical_value ) + if constexpr ( !logical_value ) { - if( sub_size <= 8 ) return 2; - return 3; + if constexpr ( sub_size <= 8 ) return 2; + else return 3; } - return level(detail::mask_type(tgt), g, sub_g) + 4; + else return level(detail::mask_type(tgt), g, sub_g) + 4; } - if( current_api >= vmx ) return 3; - if( current_api >= neon ) return 2; + else if constexpr ( current_api >= vmx ) return 3; + else if constexpr ( current_api >= neon ) return 2; - if( current_api == avx512 && logical_value ) + else if constexpr ( current_api == avx512 && logical_value ) { return level(detail::mask_type(tgt), g, sub_g) + 4; } - if (sub_size == 32) { - if (g_size == 8) return 2; - if (g_size == 4) return 3; - if (g_size == 2 && current_api >= avx512) return 3; - return 5; + else if constexpr (sub_size == 32) { + if constexpr (g_size == 8) return 2; + else if constexpr (g_size == 4) return 3; + else if constexpr (g_size == 2 && current_api >= avx512) return 3; + else return 5; } - if( g_size >= 4 ) return 2; - - if (current_api == avx && reg_size == 32) return 9; - - if( current_api >= ssse3 ) return 3; - - if( g_size == 2 ) return 4; - if (reg_size <= 8) return 8; - return 10; + else if constexpr ( g_size >= 4 ) return 2; + else if constexpr (current_api == avx && reg_size == 32) return 9; + else if constexpr ( current_api >= ssse3 ) return 3; + else if constexpr ( g_size == 2 ) return 4; + else if constexpr (reg_size <= 8) return 8; + else return 10; } }; diff --git a/include/eve/module/core/named_shuffles/slide.hpp b/include/eve/module/core/named_shuffles/slide.hpp index 8ff91303ee..289ca6aaae 100644 --- a/include/eve/module/core/named_shuffles/slide.hpp +++ b/include/eve/module/core/named_shuffles/slide.hpp @@ -78,7 +78,7 @@ struct slide_left_impl_t static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t s) { using abi_t = typename T::abi_type; - const std::size_t reg_size = sizeof(element_type_t) * T::size(); + constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); constexpr std::ptrdiff_t S = G * S_; constexpr bool is_shift_by_16 = (S * sizeof(element_type_t) % 16) == 0; constexpr bool is_shift_by_4 = (S * sizeof(element_type_t) % 4) == 0; @@ -110,22 +110,23 @@ struct slide_left_impl_t } else if constexpr( current_api >= neon || current_api >= sve ) { - if( reg_size <= 8 ) return 2; - return 3; + if constexpr ( reg_size <= 8 ) return 2; + else return 3; } else { - if( reg_size <= 8 ) return 2; - if( current_api >= avx512 ) + if constexpr ( reg_size <= 8 ) return 2; + else if constexpr ( current_api >= avx512 ) { - if( is_shift_by_4 ) return 2; - if( reg_size <= 16 ) return 2; - if( is_shift_by_2 ) return 3; - if (reg_size == 64) return 5; // this is not yet done + if constexpr ( is_shift_by_4 ) return 2; + if constexpr ( reg_size <= 16 ) return 2; + if constexpr ( is_shift_by_2 ) return 3; + if constexpr (reg_size == 64) return 5; // this is not yet done + else return 2; } - if( reg_size == 32 && is_shift_by_16 ) return 2; - if( current_api >= avx2 && reg_size == 32 ) { return 4; } - return 2; + else if constexpr ( reg_size == 32 && is_shift_by_16 ) return 2; + else if constexpr ( current_api >= avx2 && reg_size == 32 ) { return 4; } + else return 2; } } @@ -137,7 +138,7 @@ struct slide_left_impl_t { using abi_t = typename T::abi_type; constexpr std::ptrdiff_t S = S_ * G; - const std::size_t reg_size = sizeof(element_type_t) * T::size(); + constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); constexpr bool is_shift_by_16 = (S * sizeof(element_type_t) % 16) == 0; constexpr bool is_shift_by_8 = (S * sizeof(element_type_t) % 8) == 0; @@ -145,23 +146,23 @@ struct slide_left_impl_t constexpr bool is_shift_by_2 = (S * sizeof(element_type_t) % 2) == 0; if constexpr( S == 0 || S == T::size() ) return 0; - if constexpr( logical_simd_value && !abi_t::is_wide_logical ) + else if constexpr( logical_simd_value && !abi_t::is_wide_logical ) { auto mask = detail::mask_type(tgt); return level(mask, mask, g, s) + 6; } - if constexpr( current_api >= neon || current_api >= sve ) return 2; - if( current_api >= avx512 ) + else if constexpr( current_api >= neon || current_api >= sve ) return 2; + else if constexpr ( current_api >= avx512 ) { - if( is_shift_by_4 ) return 2; - if( is_shift_by_2 ) return 3; + if constexpr ( is_shift_by_4 ) return 2; + else if constexpr ( is_shift_by_2 ) return 3; } - if( is_shift_by_16 && reg_size == 32 ) return 2; - if( current_api >= avx2 && reg_size == 32 ) return 4; + else if constexpr ( is_shift_by_16 && reg_size == 32 ) return 2; + else if constexpr ( current_api >= avx2 && reg_size == 32 ) return 4; - if( current_api >= sse4_2 ) return 2; + else if constexpr ( current_api >= sse4_2 ) return 2; // sse2 - return is_shift_by_8 ? 2 : 6; + else return is_shift_by_8 ? 2 : 6; } }; diff --git a/include/eve/module/core/named_shuffles/swap_adjacent.hpp b/include/eve/module/core/named_shuffles/swap_adjacent.hpp index 16ef7ae98c..0f3ae72b46 100644 --- a/include/eve/module/core/named_shuffles/swap_adjacent.hpp +++ b/include/eve/module/core/named_shuffles/swap_adjacent.hpp @@ -67,50 +67,50 @@ struct swap_adjacent_t template static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g) { - const std::ptrdiff_t g_size = sizeof(element_type_t) * G; - const std::size_t reg_size = sizeof(element_type_t) * T::size(); - const std::size_t fund_size = eve::fundamental_cardinal_v; + constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; + constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); + constexpr std::size_t fund_size = eve::fundamental_cardinal_v; - if( current_api >= sve ) + if constexpr ( current_api >= sve ) { - if( arithmetic_simd_value ) + if constexpr( arithmetic_simd_value ) { - if (g_size <= 4 || g_size == fund_size / 2) return 2; - return 3; + if constexpr (g_size <= 4 || g_size == fund_size / 2) return 2; + else return 3; } else { - if( g_size == 8 && fund_size == 16 ) return 2; - return level(detail::mask_type(tgt), g) + 4; + if constexpr ( g_size == 8 && fund_size == 16 ) return 2; + else return level(detail::mask_type(tgt), g) + 4; } } - if( current_api >= vmx ) return 3; - if( current_api >= neon ) return 2; + else if constexpr ( current_api >= vmx ) return 3; + else if constexpr ( current_api >= neon ) return 2; - if( eve::current_api == avx512 && logical_simd_value ) + else if constexpr ( current_api == avx512 && logical_simd_value ) { return level(detail::mask_type(tgt), g) + 4; } - if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) + else if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) { using half_t = decltype(T {}.slice(lower_)); std::ptrdiff_t half_l = level(eve::as {}, g); // since we are adding, we need to deal with aggregation - if( reg_size > 32 ) return half_l; - return detail::idxm::add_shuffle_levels({half_l, half_l, 4}); + if constexpr ( reg_size > 32 ) return half_l; + else return detail::idxm::add_shuffle_levels({half_l, half_l, 4}); } - if( current_api >= sse2 ) + if constexpr ( current_api >= sse2 ) { - if( g_size >= 4 ) return 2; - if( g_size == 2 && reg_size <= 8 ) return 2; - if( current_api >= ssse3 ) return 3; - if( g_size == 2 ) return 4; - return 6; + if constexpr ( g_size >= 4 ) return 2; + else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2; + else if constexpr ( current_api >= ssse3 ) return 3; + else if constexpr ( g_size == 2 ) return 4; + else return 6; } - return 2; + else return 2; } }; diff --git a/include/eve/module/core/regular/impl/simd/x86/add.hpp b/include/eve/module/core/regular/impl/simd/x86/add.hpp index b267acdf5e..fa480cecf4 100644 --- a/include/eve/module/core/regular/impl/simd/x86/add.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/add.hpp @@ -126,42 +126,36 @@ namespace eve::detail if constexpr(floating_value &&( O::contains(lower) || O::contains(upper)) && !O::contains(strict)) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + if constexpr ( c == category::float64x8 ) return _mm512_mask_add_round_pd (src, m, v, w, dir); + else if constexpr ( c == category::float32x16 ) return _mm512_mask_add_round_ps (src, m, v, w, dir); + else if constexpr ( c == category::float64x4 || c == category::float64x2 || + c == category::float32x8 || c == category::float32x4 || c == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( c == category::float64x8 ) return _mm512_mask_add_round_pd (src, m, v, w, dir); - else if constexpr ( c == category::float32x16 ) return _mm512_mask_add_round_ps (src, m, v, w, dir); - else if constexpr ( c == category::float64x4 || c == category::float64x2 || - c == category::float32x8 || c == category::float32x4 || c == category::float32x2) - { - auto vv = eve::combine(v, w); - auto ww = eve::combine(w, v); - auto vvpww = add[opts.drop(condition_key)](vv, ww); - auto s = slice(vvpww, eve::upper_); - return if_else(cx,s,src); - } - else return add.behavior(cpu_{}, opts, v, w); + auto vv = eve::combine(v, w); + auto ww = eve::combine(w, v); + auto vvpww = add[opts.drop(condition_key)](vv, ww); + auto s = slice(vvpww, eve::upper_); + return if_else(cx,s,src); } - else return add.behavior(cpu_{}, opts, v, w); + else return add.behavior(cpu_{}, opts, v, w); } else if constexpr(O::contains(saturated)) { - constexpr auto sup_avx2 = current_api >= avx2; - - if constexpr( floating_value ) return add[cx](v, w); - else if constexpr( c == category::int16x32 ) return _mm512_mask_adds_epi16(src, m, v, w); - else if constexpr( c == category::uint16x32 ) return _mm512_mask_adds_epu16(src, m, v, w); - else if constexpr( c == category::int8x64 ) return _mm512_mask_adds_epi8(src, m, v, w); - else if constexpr( c == category::uint8x64 ) return _mm512_mask_adds_epu8(src, m, v, w); - else if constexpr( sup_avx2 && c == category::int16x16 ) return _mm256_mask_adds_epi16(src, m, v, w); - else if constexpr( sup_avx2 && c == category::uint16x16 ) return _mm256_mask_adds_epu16(src, m, v, w); - else if constexpr( sup_avx2 && c == category::int8x32 ) return _mm256_mask_adds_epi8(src, m, v, w); - else if constexpr( sup_avx2 && c == category::uint8x32 ) return _mm256_mask_adds_epu8(src, m, v, w); - else if constexpr( c == category::int16x8 ) return _mm_mask_adds_epi16(src, m, v, w); - else if constexpr( c == category::uint16x8 ) return _mm_mask_adds_epu16(src, m, v, w); - else if constexpr( c == category::int8x16 ) return _mm_mask_adds_epi8(src, m, v, w); - else if constexpr( c == category::uint8x16 ) return _mm_mask_adds_epu8(src, m, v, w); - else return add.behavior(cpu_{}, opts, v, w); + if constexpr( floating_value ) return add[cx](v, w); + else if constexpr( c == category::int16x32 ) return _mm512_mask_adds_epi16(src, m, v, w); + else if constexpr( c == category::uint16x32 ) return _mm512_mask_adds_epu16(src, m, v, w); + else if constexpr( c == category::int8x64 ) return _mm512_mask_adds_epi8(src, m, v, w); + else if constexpr( c == category::uint8x64 ) return _mm512_mask_adds_epu8(src, m, v, w); + else if constexpr( c == category::int16x16 ) return _mm256_mask_adds_epi16(src, m, v, w); + else if constexpr( c == category::uint16x16 ) return _mm256_mask_adds_epu16(src, m, v, w); + else if constexpr( c == category::int8x32 ) return _mm256_mask_adds_epi8(src, m, v, w); + else if constexpr( c == category::uint8x32 ) return _mm256_mask_adds_epu8(src, m, v, w); + else if constexpr( c == category::int16x8 ) return _mm_mask_adds_epi16(src, m, v, w); + else if constexpr( c == category::uint16x8 ) return _mm_mask_adds_epu16(src, m, v, w); + else if constexpr( c == category::int8x16 ) return _mm_mask_adds_epi8(src, m, v, w); + else if constexpr( c == category::uint8x16 ) return _mm_mask_adds_epu8(src, m, v, w); + else return add.behavior(cpu_{}, opts, v, w); } else { diff --git a/include/eve/module/core/regular/impl/simd/x86/countl_zero.hpp b/include/eve/module/core/regular/impl/simd/x86/countl_zero.hpp index 75bf7a2621..f1e4f499d3 100644 --- a/include/eve/module/core/regular/impl/simd/x86/countl_zero.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/countl_zero.hpp @@ -38,7 +38,7 @@ namespace eve::detail else if constexpr( c == category::uint64x2 ) return r_t(_mm_lzcnt_epi64(a0)); else if constexpr( c == category::uint32x4 ) return r_t(_mm_lzcnt_epi32(a0)); } - else if constexpr( current_api >= sse2 ) + else { //Inspired from: https://stackoverflow.com/questions/58823140/count-leading-zero-bits-for-each-element-in-avx2-vector-emulate-mm256-lzcnt-ep using ri_t = wide; @@ -55,8 +55,6 @@ namespace eve::detail } else return countl_zero.behavior(cpu_{}, opts, a0); } - else - return countl_zero.behavior(cpu_{}, opts, a0); } else { diff --git a/include/eve/module/core/regular/impl/simd/x86/div.hpp b/include/eve/module/core/regular/impl/simd/x86/div.hpp index 2cf2db193e..382904c602 100644 --- a/include/eve/module/core/regular/impl/simd/x86/div.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/div.hpp @@ -96,22 +96,19 @@ namespace eve::detail } else if constexpr (floating_value && !O::contains(strict) && (O::contains(lower) || O::contains(upper))) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + + if constexpr ( c == category::float64x8 ) return _mm512_add_round_pd (v, w, dir); + else if constexpr ( c == category::float32x16 ) return _mm512_add_round_ps (v, w, dir); + else if constexpr ( c == category::float64x4 || c == category::float64x2 || + c == category::float32x8 || c == category::float32x4 || c == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( c == category::float64x8 ) return _mm512_add_round_pd (v, w, dir); - else if constexpr ( c == category::float32x16 ) return _mm512_add_round_ps (v, w, dir); - else if constexpr ( c == category::float64x4 || c == category::float64x2 || - c == category::float32x8 || c == category::float32x4 || c == category::float32x2) - { - auto vv = combine(v, v); - auto ww = combine(w, w); - auto vvpww = div[o](vv, ww); - auto s = slice(vvpww, eve::upper_); - return if_else(cx,s,src); - } + auto vv = combine(v, v); + auto ww = combine(w, w); + auto vvpww = div[o](vv, ww); + auto s = slice(vvpww, eve::upper_); + return if_else(cx,s,src); } - return div.behavior(cpu_{}, o, v, w); } else if constexpr (O::contains(toward_zero) || O::contains(upward) || O::contains(downward) || O::contains(to_nearest)) diff --git a/include/eve/module/core/regular/impl/simd/x86/fma.hpp b/include/eve/module/core/regular/impl/simd/x86/fma.hpp index 43e0c45602..584fac755c 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fma.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fma.hpp @@ -103,26 +103,22 @@ namespace eve::detail { if constexpr(!O::contains(strict)) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmadd_round_pd (a, m, b, c, dir); + else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmadd_round_ps (a, m, b, c, dir); + else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || + cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmadd_round_pd (a, m, b, c, dir); - else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmadd_round_ps (a, m, b, c, dir); - else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || - cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) - { - auto aa = eve::combine(a, a); - auto bb = eve::combine(b, b); - auto cc = eve::combine(c, c); - auto aabbcc = fma[opts.drop(condition_key)](aa, bb, cc); - auto s = slice(aabbcc, eve::upper_); - return if_else(mask,s,src); - } - else return fma.behavior(cpu_{}, opts, a, b, c); + auto aa = eve::combine(a, a); + auto bb = eve::combine(b, b); + auto cc = eve::combine(c, c); + auto aabbcc = fma[opts.drop(condition_key)](aa, bb, cc); + auto s = slice(aabbcc, eve::upper_); + return if_else(mask,s,src); } - else return fma.behavior(cpu_{}, opts, a, b, c); + else return fma.behavior(cpu_{}, opts, a, b, c); } - else return fma.behavior(cpu_{}, opts, a, b, c); + else return fma.behavior(cpu_{}, opts, a, b, c); } else if constexpr( cx == category::float32x16 ) return _mm512_mask_fmadd_ps(a, m, b, c); else if constexpr( cx == category::float64x8 ) return _mm512_mask_fmadd_pd(a, m, b, c); diff --git a/include/eve/module/core/regular/impl/simd/x86/fms.hpp b/include/eve/module/core/regular/impl/simd/x86/fms.hpp index 199814bc8e..419d269f87 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fms.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fms.hpp @@ -107,24 +107,20 @@ namespace eve::detail { if constexpr(!O::contains(strict)) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmsub_round_pd (v, m, w, x, dir); + else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmsub_round_ps (v, m, w, x, dir); + else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || + cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmsub_round_pd (v, m, w, x, dir); - else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmsub_round_ps (v, m, w, x, dir); - else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || - cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) - { - auto aa = eve::combine(v, v); - auto bb = eve::combine(w, w); - auto cc = eve::combine(x, x); - auto aabbcc = fms[opts.drop(condition_key)](aa, bb, cc); - auto s = slice(aabbcc, eve::upper_); - return if_else(mask,s,src); - } - else return fms.behavior(cpu_{}, opts, v, w, x); + auto aa = eve::combine(v, v); + auto bb = eve::combine(w, w); + auto cc = eve::combine(x, x); + auto aabbcc = fms[opts.drop(condition_key)](aa, bb, cc); + auto s = slice(aabbcc, eve::upper_); + return if_else(mask,s,src); } - else return fms.behavior(cpu_{}, opts, v, w, x); + else return fms.behavior(cpu_{}, opts, v, w, x); } else return fms.behavior(cpu_{}, opts, v, w, x); } diff --git a/include/eve/module/core/regular/impl/simd/x86/fnma.hpp b/include/eve/module/core/regular/impl/simd/x86/fnma.hpp index 91e9785efb..89bf13374a 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fnma.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fnma.hpp @@ -108,26 +108,22 @@ namespace eve::detail { if constexpr(!O::contains(strict)) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmadd_round_pd (a, m, b, c, dir); + else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmadd_round_ps (a, m, b, c, dir); + else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || + cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmadd_round_pd (a, m, b, c, dir); - else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmadd_round_ps (a, m, b, c, dir); - else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || - cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) - { - auto aa = eve::combine(a, a); - auto bb = eve::combine(b, b); - auto cc = eve::combine(c, c); - auto aabbcc = fnma[opts.drop(condition_key)](aa, bb, cc); - auto s = slice(aabbcc, eve::upper_); - return if_else(mask,s,src); - } - else return fnma.behavior(cpu_{}, opts, a, b, c); + auto aa = eve::combine(a, a); + auto bb = eve::combine(b, b); + auto cc = eve::combine(c, c); + auto aabbcc = fnma[opts.drop(condition_key)](aa, bb, cc); + auto s = slice(aabbcc, eve::upper_); + return if_else(mask,s,src); } - else return fnma.behavior(cpu_{}, opts, a, b, c); + else return fnma.behavior(cpu_{}, opts, a, b, c); } - else return fnma.behavior(cpu_{}, opts, a, b, c); + else return fnma.behavior(cpu_{}, opts, a, b, c); } if ((O::contains(lower) || O::contains(upper))&& floating_value) return if_else(mask, eve::fnma[opts.drop(condition_key)](a, b, c), a); else if constexpr( cx == category::float32x16 ) return _mm512_mask_fnmadd_ps(a, m, b, c); diff --git a/include/eve/module/core/regular/impl/simd/x86/fnms.hpp b/include/eve/module/core/regular/impl/simd/x86/fnms.hpp index 7ba679f138..c03e6bdf00 100644 --- a/include/eve/module/core/regular/impl/simd/x86/fnms.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/fnms.hpp @@ -105,26 +105,22 @@ namespace eve::detail { if constexpr(!O::contains(strict)) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmsub_round_pd (a, m, b, c, dir); + else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmsub_round_ps (a, m, b, c, dir); + else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || + cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmsub_round_pd (a, m, b, c, dir); - else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmsub_round_ps (a, m, b, c, dir); - else if constexpr ( cx == category::float64x4 || cx == category::float64x2 || - cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2) - { - auto aa = eve::combine(a, a); - auto bb = eve::combine(b, b); - auto cc = eve::combine(c, c); - auto aabbcc = fnms[opts.drop(condition_key)](aa, bb, cc); - auto s = slice(aabbcc, eve::upper_); - return if_else(mask,s,src); - } - else return fnms.behavior(cpu_{}, opts, a, b, c); + auto aa = eve::combine(a, a); + auto bb = eve::combine(b, b); + auto cc = eve::combine(c, c); + auto aabbcc = fnms[opts.drop(condition_key)](aa, bb, cc); + auto s = slice(aabbcc, eve::upper_); + return if_else(mask,s,src); } - else return fnms.behavior(cpu_{}, opts, a, b, c); + else return fnms.behavior(cpu_{}, opts, a, b, c); } - else return fnms.behavior(cpu_{}, opts, a, b, c); + else return fnms.behavior(cpu_{}, opts, a, b, c); } if ((O::contains(lower) || O::contains(upper))&& floating_value) return if_else(mask, eve::fnms[opts.drop(condition_key)](a, b, c), a); else if constexpr( cx == category::float32x16 ) return _mm512_mask_fnmsub_ps(a, m, b, c); diff --git a/include/eve/module/core/regular/impl/simd/x86/mul.hpp b/include/eve/module/core/regular/impl/simd/x86/mul.hpp index f28bea9e37..ad0187eb38 100644 --- a/include/eve/module/core/regular/impl/simd/x86/mul.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/mul.hpp @@ -135,23 +135,19 @@ namespace eve::detail if constexpr(floating_value &&( O::contains(lower) || O::contains(upper)) && !O::contains(strict)) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + if constexpr ( c == category::float64x8 ) return _mm512_mask_mul_round_pd (src, m, a, b, dir); + else if constexpr ( c == category::float32x16 ) return _mm512_mask_mul_round_ps (src, m, a, b, dir); + else if constexpr ( c == category::float64x4 || c == category::float64x2 || + c == category::float32x8 || c == category::float32x4 || c == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( c == category::float64x8 ) return _mm512_mask_mul_round_pd (src, m, a, b, dir); - else if constexpr ( c == category::float32x16 ) return _mm512_mask_mul_round_ps (src, m, a, b, dir); - else if constexpr ( c == category::float64x4 || c == category::float64x2 || - c == category::float32x8 || c == category::float32x4 || c == category::float32x2) - { - auto aa = eve::combine(a, a); - auto bb = eve::combine(b, b); - auto aapbb = mul[opts.drop(condition_key)](aa, bb); - auto s = slice(aapbb, eve::upper_); - return if_else(cx,s,src); - } - else return add.behavior(cpu_{}, opts, a, b); + auto aa = eve::combine(a, a); + auto bb = eve::combine(b, b); + auto aapbb = mul[opts.drop(condition_key)](aa, bb); + auto s = slice(aapbb, eve::upper_); + return if_else(cx,s,src); } - else return add.behavior(cpu_{}, opts, a, b); + else return add.behavior(cpu_{}, opts, a, b); } else if constexpr(O::contains(saturated)) { diff --git a/include/eve/module/core/regular/impl/simd/x86/rec.hpp b/include/eve/module/core/regular/impl/simd/x86/rec.hpp index 7de1c0d438..4f87367e45 100644 --- a/include/eve/module/core/regular/impl/simd/x86/rec.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/rec.hpp @@ -62,22 +62,21 @@ namespace eve::detail } else if constexpr(O::contains(pedantic) || current_api < avx512) { - if (current_api >= avx512) + if constexpr (current_api >= avx512) { if constexpr( c == category::float32x16) return _mm512_div_ps(one(eve::as(v)), v); else if constexpr( c == category::float64x8 ) return _mm512_div_pd(one(eve::as(v)), v); } - if (current_api >= avx) + else if constexpr (current_api >= avx) { if constexpr( c == category::float32x8 ) return _mm256_div_ps(one(eve::as(v)), v); else if constexpr( c == category::float64x4 ) return _mm256_div_pd(one(eve::as(v)), v); } - if (current_api >= sse2) + else { if constexpr( c == category::float32x4 ) return _mm_div_ps(one(eve::as(v)), v); else if constexpr( c == category::float64x2 ) return _mm_div_pd(one(eve::as(v)), v); } - return rec.behavior(cpu_{}, o, v); } else { diff --git a/include/eve/module/core/regular/impl/simd/x86/sub.hpp b/include/eve/module/core/regular/impl/simd/x86/sub.hpp index 15dd6a0103..c391d2fd37 100644 --- a/include/eve/module/core/regular/impl/simd/x86/sub.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/sub.hpp @@ -134,42 +134,36 @@ namespace eve::detail if constexpr(floating_value &&( O::contains(lower) || O::contains(upper)) && !O::contains(strict)) { - if constexpr(current_api >= avx512) + auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; + if constexpr ( c == category::float64x8 ) return _mm512_mask_sub_round_pd (src, m, v, w, dir); + else if constexpr ( c == category::float32x16 ) return _mm512_mask_sub_round_ps (src, m, v, w, dir); + else if constexpr ( c == category::float64x4 || c == category::float64x2 || + c == category::float32x8 || c == category::float32x4 || c == category::float32x2) { - auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC; - if constexpr ( c == category::float64x8 ) return _mm512_mask_sub_round_pd (src, m, v, w, dir); - else if constexpr ( c == category::float32x16 ) return _mm512_mask_sub_round_ps (src, m, v, w, dir); - else if constexpr ( c == category::float64x4 || c == category::float64x2 || - c == category::float32x8 || c == category::float32x4 || c == category::float32x2) - { - auto vv = eve::combine(v, w); - auto ww = eve::combine(w, v); - auto vvpww = sub[opts.drop(condition_key)](vv, ww); - auto s = slice(vvpww, eve::upper_); - return if_else(cx,s,src); - } - else return add.behavior(cpu_{}, opts, v, w); + auto vv = eve::combine(v, w); + auto ww = eve::combine(w, v); + auto vvpww = sub[opts.drop(condition_key)](vv, ww); + auto s = slice(vvpww, eve::upper_); + return if_else(cx,s,src); } - else return add.behavior(cpu_{}, opts, v, w); + else return add.behavior(cpu_{}, opts, v, w); } else if constexpr(O::contains(saturated)) { - constexpr auto sup_avx2 = current_api >= avx2; - - if constexpr( floating_value ) return sub[cx](v, w); - else if constexpr( c == category::int16x32 ) return _mm512_mask_subs_epi16(src, m, v, w); - else if constexpr( c == category::uint16x32 ) return _mm512_mask_subs_epu16(src, m, v, w); - else if constexpr( c == category::int8x64 ) return _mm512_mask_subs_epi8(src, m, v, w); - else if constexpr( c == category::uint8x64 ) return _mm512_mask_subs_epu8(src, m, v, w); - else if constexpr( sup_avx2 && c == category::int16x16 ) return _mm256_mask_subs_epi16(src, m, v, w); - else if constexpr( sup_avx2 && c == category::uint16x16 ) return _mm256_mask_subs_epu16(src, m, v, w); - else if constexpr( sup_avx2 && c == category::int8x32 ) return _mm256_mask_subs_epi8(src, m, v, w); - else if constexpr( sup_avx2 && c == category::uint8x32 ) return _mm256_mask_subs_epu8(src, m, v, w); - else if constexpr( c == category::int16x8 ) return _mm_mask_subs_epi16(src, m, v, w); - else if constexpr( c == category::uint16x8 ) return _mm_mask_subs_epu16(src, m, v, w); - else if constexpr( c == category::int8x16 ) return _mm_mask_subs_epi8(src, m, v, w); - else if constexpr( c == category::uint8x16 ) return _mm_mask_subs_epu8(src, m, v, w); - else return sub.behavior(cpu_{}, opts, v, w); + if constexpr( floating_value ) return sub[cx](v, w); + else if constexpr( c == category::int16x32 ) return _mm512_mask_subs_epi16(src, m, v, w); + else if constexpr( c == category::uint16x32 ) return _mm512_mask_subs_epu16(src, m, v, w); + else if constexpr( c == category::int8x64 ) return _mm512_mask_subs_epi8(src, m, v, w); + else if constexpr( c == category::uint8x64 ) return _mm512_mask_subs_epu8(src, m, v, w); + else if constexpr( c == category::int16x16 ) return _mm256_mask_subs_epi16(src, m, v, w); + else if constexpr( c == category::uint16x16 ) return _mm256_mask_subs_epu16(src, m, v, w); + else if constexpr( c == category::int8x32 ) return _mm256_mask_subs_epi8(src, m, v, w); + else if constexpr( c == category::uint8x32 ) return _mm256_mask_subs_epu8(src, m, v, w); + else if constexpr( c == category::int16x8 ) return _mm_mask_subs_epi16(src, m, v, w); + else if constexpr( c == category::uint16x8 ) return _mm_mask_subs_epu16(src, m, v, w); + else if constexpr( c == category::int8x16 ) return _mm_mask_subs_epi8(src, m, v, w); + else if constexpr( c == category::uint8x16 ) return _mm_mask_subs_epu8(src, m, v, w); + else return sub.behavior(cpu_{}, opts, v, w); } else { From 80fe8a2f55d9bfd2e4dcc634ec76b76cf3e91278 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 21 Nov 2024 13:46:47 +0100 Subject: [PATCH 2/7] more constexpr --- include/eve/arch/riscv/rvv_utils.hpp | 8 ++++---- .../module/core/regular/impl/simd/x86/negate.hpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/eve/arch/riscv/rvv_utils.hpp b/include/eve/arch/riscv/rvv_utils.hpp index e783ca92b8..24e191abe6 100644 --- a/include/eve/arch/riscv/rvv_utils.hpp +++ b/include/eve/arch/riscv/rvv_utils.hpp @@ -20,16 +20,16 @@ constexpr auto rvv_lmul_v = [] { constexpr std::ptrdiff_t m1_len = __riscv_v_fixed_vlen; constexpr std::ptrdiff_t min_len = m1_len * sizeof(scalar_type) / 8; - std::ptrdiff_t expected_len = sizeof(scalar_type) * 8 * cardinal::value; - std::ptrdiff_t reg_len = std::max(min_len, expected_len); - if( reg_len >= m1_len ) return static_cast(reg_len / m1_len); + constexpr std::ptrdiff_t expected_len = sizeof(scalar_type) * 8 * cardinal::value; + constexpr std::ptrdiff_t reg_len = std::max(min_len, expected_len); + if constexpr ( reg_len >= m1_len ) return static_cast(reg_len / m1_len); else return -static_cast(m1_len / reg_len); }(); template constexpr auto rvv_logical_ratio_v = [] { - auto lmul = rvv_lmul_v; + constexpr auto lmul = rvv_lmul_v; constexpr auto element_size = sizeof(scalar_type) * 8; return lmul > 0 ? element_size / lmul : element_size * (-lmul); }(); diff --git a/include/eve/module/core/regular/impl/simd/x86/negate.hpp b/include/eve/module/core/regular/impl/simd/x86/negate.hpp index cbe9fc387c..dd909812c3 100644 --- a/include/eve/module/core/regular/impl/simd/x86/negate.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/negate.hpp @@ -21,10 +21,10 @@ namespace eve::detail wide a1) noexcept requires std::same_as, x86_128_> { - if( sizeof(T) == 8 ) return negate.behavior(cpu_{}, opts, a0, a1); - else if( sizeof(T) == 4 ) return _mm_sign_epi32(a0, a1); - else if( sizeof(T) == 2 ) return _mm_sign_epi16(a0, a1); - else if( sizeof(T) == 1 ) return _mm_sign_epi8(a0, a1); + if constexpr ( sizeof(T) == 8 ) return negate.behavior(cpu_{}, opts, a0, a1); + else if constexpr ( sizeof(T) == 4 ) return _mm_sign_epi32(a0, a1); + else if constexpr ( sizeof(T) == 2 ) return _mm_sign_epi16(a0, a1); + else if constexpr ( sizeof(T) == 1 ) return _mm_sign_epi8(a0, a1); } // ----------------------------------------------------------------------------------------------- @@ -36,9 +36,9 @@ namespace eve::detail wide a1) noexcept requires std::same_as, x86_256_> { - if( sizeof(T) == 8 ) return negate.behavior(cpu_{}, opts, a0, a1); - else if( sizeof(T) == 4 ) return _mm256_sign_epi32(a0, a1); - else if( sizeof(T) == 2 ) return _mm256_sign_epi16(a0, a1); - else if( sizeof(T) == 1 ) return _mm256_sign_epi8(a0, a1); + if constexpr ( sizeof(T) == 8 ) return negate.behavior(cpu_{}, opts, a0, a1); + else if constexpr ( sizeof(T) == 4 ) return _mm256_sign_epi32(a0, a1); + else if constexpr ( sizeof(T) == 2 ) return _mm256_sign_epi16(a0, a1); + else if constexpr ( sizeof(T) == 1 ) return _mm256_sign_epi8(a0, a1); } } From 90174f1712562c37bec8d10f47dbcc305fa713a3 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 21 Nov 2024 14:47:53 +0100 Subject: [PATCH 3/7] fix control reach of non-void fn --- include/eve/module/core/regular/impl/simd/x86/rec.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/eve/module/core/regular/impl/simd/x86/rec.hpp b/include/eve/module/core/regular/impl/simd/x86/rec.hpp index 4f87367e45..07023def12 100644 --- a/include/eve/module/core/regular/impl/simd/x86/rec.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/rec.hpp @@ -66,16 +66,19 @@ namespace eve::detail { if constexpr( c == category::float32x16) return _mm512_div_ps(one(eve::as(v)), v); else if constexpr( c == category::float64x8 ) return _mm512_div_pd(one(eve::as(v)), v); + else return rec.behavior(cpu_{}, o, v); } else if constexpr (current_api >= avx) { if constexpr( c == category::float32x8 ) return _mm256_div_ps(one(eve::as(v)), v); else if constexpr( c == category::float64x4 ) return _mm256_div_pd(one(eve::as(v)), v); + else return rec.behavior(cpu_{}, o, v); } else { if constexpr( c == category::float32x4 ) return _mm_div_ps(one(eve::as(v)), v); else if constexpr( c == category::float64x2 ) return _mm_div_pd(one(eve::as(v)), v); + else return rec.behavior(cpu_{}, o, v); } } else From b05416f44f3f0df62a40fa46fbf9b09cd2fed3c1 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Fri, 22 Nov 2024 14:07:47 +0100 Subject: [PATCH 4/7] fix --- include/eve/module/core/named_shuffles/blend.hpp | 2 +- include/eve/module/core/named_shuffles/slide.hpp | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/eve/module/core/named_shuffles/blend.hpp b/include/eve/module/core/named_shuffles/blend.hpp index cf9410821c..a7b81cd250 100644 --- a/include/eve/module/core/named_shuffles/blend.hpp +++ b/include/eve/module/core/named_shuffles/blend.hpp @@ -111,7 +111,7 @@ struct blend_t if constexpr ( current_api >= neon ) { if constexpr ( current_api >= asimd && (count_from_x == 1 || count_from_y == 1) ) return 2; - return 3; + else return 3; } else if constexpr ( current_api >= sse2 ) { diff --git a/include/eve/module/core/named_shuffles/slide.hpp b/include/eve/module/core/named_shuffles/slide.hpp index 289ca6aaae..44e6f9550e 100644 --- a/include/eve/module/core/named_shuffles/slide.hpp +++ b/include/eve/module/core/named_shuffles/slide.hpp @@ -119,13 +119,13 @@ struct slide_left_impl_t else if constexpr ( current_api >= avx512 ) { if constexpr ( is_shift_by_4 ) return 2; - if constexpr ( reg_size <= 16 ) return 2; - if constexpr ( is_shift_by_2 ) return 3; - if constexpr (reg_size == 64) return 5; // this is not yet done - else return 2; + else if constexpr ( reg_size <= 16 ) return 2; + else if constexpr ( is_shift_by_2 ) return 3; + else if constexpr (reg_size == 64) return 5; // this is not yet done } - else if constexpr ( reg_size == 32 && is_shift_by_16 ) return 2; - else if constexpr ( current_api >= avx2 && reg_size == 32 ) { return 4; } + + if constexpr ( reg_size == 32 && is_shift_by_16 ) return 2; + else if constexpr ( current_api >= avx2 && reg_size == 32 ) return 4; else return 2; } } @@ -157,7 +157,8 @@ struct slide_left_impl_t if constexpr ( is_shift_by_4 ) return 2; else if constexpr ( is_shift_by_2 ) return 3; } - else if constexpr ( is_shift_by_16 && reg_size == 32 ) return 2; + + if constexpr ( is_shift_by_16 && reg_size == 32 ) return 2; else if constexpr ( current_api >= avx2 && reg_size == 32 ) return 4; else if constexpr ( current_api >= sse4_2 ) return 2; From c8ed87f5fd669673a79fa03f95a607ddc88aae84 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Sun, 24 Nov 2024 20:23:23 +0100 Subject: [PATCH 5/7] marked constexpr-only function as consteval --- include/eve/arch/cpu/top_bits.hpp | 4 ++-- include/eve/module/core/named_shuffles/blend.hpp | 8 ++++---- include/eve/module/core/named_shuffles/broadcast_lane.hpp | 4 ++-- include/eve/module/core/named_shuffles/reverse.hpp | 4 ++-- .../module/core/named_shuffles/reverse_in_subgroups.hpp | 4 ++-- include/eve/module/core/named_shuffles/slide.hpp | 6 +++--- include/eve/module/core/named_shuffles/swap_adjacent.hpp | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/eve/arch/cpu/top_bits.hpp b/include/eve/arch/cpu/top_bits.hpp index e7acd805f9..5df4477f6a 100644 --- a/include/eve/arch/cpu/top_bits.hpp +++ b/include/eve/arch/cpu/top_bits.hpp @@ -106,13 +106,13 @@ namespace detail else return movemask(logical_type{}).first; } - EVE_FORCEINLINE static constexpr std::ptrdiff_t bits_per_element_impl() + EVE_FORCEINLINE static consteval std::ptrdiff_t bits_per_element_impl() { if constexpr ( is_aggregated ) return top_bits::bits_per_element; else return decltype(movemask(logical_type{}).second){}(); } - static constexpr bool is_cheap_impl() + static consteval bool is_cheap_impl() { if constexpr ( has_emulated_abi_v ) return true; else if constexpr ( is_aggregated ) return top_bits::is_cheap; diff --git a/include/eve/module/core/named_shuffles/blend.hpp b/include/eve/module/core/named_shuffles/blend.hpp index a7b81cd250..345ddac8b3 100644 --- a/include/eve/module/core/named_shuffles/blend.hpp +++ b/include/eve/module/core/named_shuffles/blend.hpp @@ -68,7 +68,7 @@ namespace eve struct blend_t { template - static constexpr auto pattern(eve::as, eve::as, eve::fixed, pattern_t) + static consteval auto pattern(eve::as, eve::as, eve::fixed, pattern_t) { static_assert(((0 <= I && I <= 1) && ...), "pattern for blend has to only contain 0 and 1"); static_assert(pattern_t::size() * G == T::size(), "pattern has wrong number of elements"); @@ -82,7 +82,7 @@ struct blend_t } template - static constexpr std::ptrdiff_t level(eve::as, eve::as, eve::fixed g, pattern_t p) + static consteval std::ptrdiff_t level(eve::as, eve::as, eve::fixed g, pattern_t p) { if constexpr( sizeof...(I) == 1 ) return 0; else if constexpr( eve::has_aggregated_abi_v ) @@ -142,14 +142,14 @@ struct blend_t } template - static constexpr auto + static consteval auto pattern(eve::as tgt, eve::as, eve::fixed g, pattern_formula auto gen) { return pattern(tgt, tgt, g, fix_pattern(gen)); } template - static constexpr auto level(eve::as tgt, eve::as, eve::fixed g, pattern_formula auto gen) + static consteval auto level(eve::as tgt, eve::as, eve::fixed g, pattern_formula auto gen) { return level(tgt, tgt, g, fix_pattern(gen)); } diff --git a/include/eve/module/core/named_shuffles/broadcast_lane.hpp b/include/eve/module/core/named_shuffles/broadcast_lane.hpp index bcdc341c7c..71f677a14a 100644 --- a/include/eve/module/core/named_shuffles/broadcast_lane.hpp +++ b/include/eve/module/core/named_shuffles/broadcast_lane.hpp @@ -59,14 +59,14 @@ namespace eve struct broadcast_lane_t { template - static constexpr auto pattern(eve::as, eve::fixed, eve::index_t) + static consteval auto pattern(eve::as, eve::fixed, eve::index_t) { static_assert(I < T::size() / G); return eve::fix_pattern([](int, int) { return I; }); } template - static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t i) + static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t i) { constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; diff --git a/include/eve/module/core/named_shuffles/reverse.hpp b/include/eve/module/core/named_shuffles/reverse.hpp index ec89ef2264..ca2db55ecb 100644 --- a/include/eve/module/core/named_shuffles/reverse.hpp +++ b/include/eve/module/core/named_shuffles/reverse.hpp @@ -55,13 +55,13 @@ namespace eve //================================================================================================ struct reverse_t { - template static constexpr auto pattern(eve::as, eve::fixed) + template static consteval auto pattern(eve::as, eve::fixed) { return eve::fix_pattern([](int i, int size) { return size - i - 1; }); } template - static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g) + static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g) { if constexpr( eve::has_aggregated_abi_v ) { diff --git a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp index 856d664892..76b78a9191 100644 --- a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp +++ b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp @@ -62,7 +62,7 @@ namespace eve struct reverse_in_subgroups_t { template - static constexpr auto pattern(eve::as, eve::fixed, eve::fixed) + static consteval auto pattern(eve::as, eve::fixed, eve::fixed) { static_assert(SubG <= T::size() / G); static_assert(SubG >= 1); @@ -77,7 +77,7 @@ struct reverse_in_subgroups_t } template - static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::fixed sub_g) + static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::fixed sub_g) { constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; constexpr std::size_t sub_size = g_size * SubG; diff --git a/include/eve/module/core/named_shuffles/slide.hpp b/include/eve/module/core/named_shuffles/slide.hpp index 44e6f9550e..f3e981f049 100644 --- a/include/eve/module/core/named_shuffles/slide.hpp +++ b/include/eve/module/core/named_shuffles/slide.hpp @@ -68,14 +68,14 @@ struct slide_left_impl_t { // One agr template - static constexpr auto pattern(eve::as, eve::fixed, eve::index_t) + static consteval auto pattern(eve::as, eve::fixed, eve::index_t) { static_assert(G > 0 && 0 <= S && S <= T::size() / G); return eve::fix_pattern([](int i, int n) { return (i + S) < n ? i + S : na_; }); } template - static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t s) + static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t s) { using abi_t = typename T::abi_type; constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); @@ -133,7 +133,7 @@ struct slide_left_impl_t // Two args template - static constexpr std::ptrdiff_t + static consteval std::ptrdiff_t level(eve::as tgt, eve::as, eve::fixed g, eve::index_t s) { using abi_t = typename T::abi_type; diff --git a/include/eve/module/core/named_shuffles/swap_adjacent.hpp b/include/eve/module/core/named_shuffles/swap_adjacent.hpp index 0f3ae72b46..e46be290b5 100644 --- a/include/eve/module/core/named_shuffles/swap_adjacent.hpp +++ b/include/eve/module/core/named_shuffles/swap_adjacent.hpp @@ -53,7 +53,7 @@ namespace eve struct swap_adjacent_t { template - static constexpr auto pattern(eve::as, eve::fixed) + static consteval auto pattern(eve::as, eve::fixed) requires(G < T::size()) { return eve::fix_pattern( @@ -65,7 +65,7 @@ struct swap_adjacent_t } template - static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g) + static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g) { constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); From c6ff5b3f481d6682c8e044f6d4ebab14f72cb57d Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Mon, 25 Nov 2024 12:38:39 +0100 Subject: [PATCH 6/7] Revert "marked constexpr-only function as consteval" This reverts commit c8ed87f5fd669673a79fa03f95a607ddc88aae84. --- include/eve/arch/cpu/top_bits.hpp | 4 ++-- include/eve/module/core/named_shuffles/blend.hpp | 8 ++++---- include/eve/module/core/named_shuffles/broadcast_lane.hpp | 4 ++-- include/eve/module/core/named_shuffles/reverse.hpp | 4 ++-- .../module/core/named_shuffles/reverse_in_subgroups.hpp | 4 ++-- include/eve/module/core/named_shuffles/slide.hpp | 6 +++--- include/eve/module/core/named_shuffles/swap_adjacent.hpp | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/eve/arch/cpu/top_bits.hpp b/include/eve/arch/cpu/top_bits.hpp index 5df4477f6a..e7acd805f9 100644 --- a/include/eve/arch/cpu/top_bits.hpp +++ b/include/eve/arch/cpu/top_bits.hpp @@ -106,13 +106,13 @@ namespace detail else return movemask(logical_type{}).first; } - EVE_FORCEINLINE static consteval std::ptrdiff_t bits_per_element_impl() + EVE_FORCEINLINE static constexpr std::ptrdiff_t bits_per_element_impl() { if constexpr ( is_aggregated ) return top_bits::bits_per_element; else return decltype(movemask(logical_type{}).second){}(); } - static consteval bool is_cheap_impl() + static constexpr bool is_cheap_impl() { if constexpr ( has_emulated_abi_v ) return true; else if constexpr ( is_aggregated ) return top_bits::is_cheap; diff --git a/include/eve/module/core/named_shuffles/blend.hpp b/include/eve/module/core/named_shuffles/blend.hpp index 345ddac8b3..a7b81cd250 100644 --- a/include/eve/module/core/named_shuffles/blend.hpp +++ b/include/eve/module/core/named_shuffles/blend.hpp @@ -68,7 +68,7 @@ namespace eve struct blend_t { template - static consteval auto pattern(eve::as, eve::as, eve::fixed, pattern_t) + static constexpr auto pattern(eve::as, eve::as, eve::fixed, pattern_t) { static_assert(((0 <= I && I <= 1) && ...), "pattern for blend has to only contain 0 and 1"); static_assert(pattern_t::size() * G == T::size(), "pattern has wrong number of elements"); @@ -82,7 +82,7 @@ struct blend_t } template - static consteval std::ptrdiff_t level(eve::as, eve::as, eve::fixed g, pattern_t p) + static constexpr std::ptrdiff_t level(eve::as, eve::as, eve::fixed g, pattern_t p) { if constexpr( sizeof...(I) == 1 ) return 0; else if constexpr( eve::has_aggregated_abi_v ) @@ -142,14 +142,14 @@ struct blend_t } template - static consteval auto + static constexpr auto pattern(eve::as tgt, eve::as, eve::fixed g, pattern_formula auto gen) { return pattern(tgt, tgt, g, fix_pattern(gen)); } template - static consteval auto level(eve::as tgt, eve::as, eve::fixed g, pattern_formula auto gen) + static constexpr auto level(eve::as tgt, eve::as, eve::fixed g, pattern_formula auto gen) { return level(tgt, tgt, g, fix_pattern(gen)); } diff --git a/include/eve/module/core/named_shuffles/broadcast_lane.hpp b/include/eve/module/core/named_shuffles/broadcast_lane.hpp index 71f677a14a..bcdc341c7c 100644 --- a/include/eve/module/core/named_shuffles/broadcast_lane.hpp +++ b/include/eve/module/core/named_shuffles/broadcast_lane.hpp @@ -59,14 +59,14 @@ namespace eve struct broadcast_lane_t { template - static consteval auto pattern(eve::as, eve::fixed, eve::index_t) + static constexpr auto pattern(eve::as, eve::fixed, eve::index_t) { static_assert(I < T::size() / G); return eve::fix_pattern([](int, int) { return I; }); } template - static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t i) + static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t i) { constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; diff --git a/include/eve/module/core/named_shuffles/reverse.hpp b/include/eve/module/core/named_shuffles/reverse.hpp index ca2db55ecb..ec89ef2264 100644 --- a/include/eve/module/core/named_shuffles/reverse.hpp +++ b/include/eve/module/core/named_shuffles/reverse.hpp @@ -55,13 +55,13 @@ namespace eve //================================================================================================ struct reverse_t { - template static consteval auto pattern(eve::as, eve::fixed) + template static constexpr auto pattern(eve::as, eve::fixed) { return eve::fix_pattern([](int i, int size) { return size - i - 1; }); } template - static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g) + static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g) { if constexpr( eve::has_aggregated_abi_v ) { diff --git a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp index 76b78a9191..856d664892 100644 --- a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp +++ b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp @@ -62,7 +62,7 @@ namespace eve struct reverse_in_subgroups_t { template - static consteval auto pattern(eve::as, eve::fixed, eve::fixed) + static constexpr auto pattern(eve::as, eve::fixed, eve::fixed) { static_assert(SubG <= T::size() / G); static_assert(SubG >= 1); @@ -77,7 +77,7 @@ struct reverse_in_subgroups_t } template - static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::fixed sub_g) + static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::fixed sub_g) { constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; constexpr std::size_t sub_size = g_size * SubG; diff --git a/include/eve/module/core/named_shuffles/slide.hpp b/include/eve/module/core/named_shuffles/slide.hpp index f3e981f049..44e6f9550e 100644 --- a/include/eve/module/core/named_shuffles/slide.hpp +++ b/include/eve/module/core/named_shuffles/slide.hpp @@ -68,14 +68,14 @@ struct slide_left_impl_t { // One agr template - static consteval auto pattern(eve::as, eve::fixed, eve::index_t) + static constexpr auto pattern(eve::as, eve::fixed, eve::index_t) { static_assert(G > 0 && 0 <= S && S <= T::size() / G); return eve::fix_pattern([](int i, int n) { return (i + S) < n ? i + S : na_; }); } template - static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t s) + static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t s) { using abi_t = typename T::abi_type; constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); @@ -133,7 +133,7 @@ struct slide_left_impl_t // Two args template - static consteval std::ptrdiff_t + static constexpr std::ptrdiff_t level(eve::as tgt, eve::as, eve::fixed g, eve::index_t s) { using abi_t = typename T::abi_type; diff --git a/include/eve/module/core/named_shuffles/swap_adjacent.hpp b/include/eve/module/core/named_shuffles/swap_adjacent.hpp index e46be290b5..0f3ae72b46 100644 --- a/include/eve/module/core/named_shuffles/swap_adjacent.hpp +++ b/include/eve/module/core/named_shuffles/swap_adjacent.hpp @@ -53,7 +53,7 @@ namespace eve struct swap_adjacent_t { template - static consteval auto pattern(eve::as, eve::fixed) + static constexpr auto pattern(eve::as, eve::fixed) requires(G < T::size()) { return eve::fix_pattern( @@ -65,7 +65,7 @@ struct swap_adjacent_t } template - static consteval std::ptrdiff_t level(eve::as tgt, eve::fixed g) + static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g) { constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); From 93b76d9c7b53616f14eafefdbdde345d273ab024 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Mon, 25 Nov 2024 15:13:41 +0100 Subject: [PATCH 7/7] revert changes to named_shuffle and arch --- include/eve/arch/cpu/top_bits.hpp | 22 ++-- include/eve/arch/riscv/rvv_utils.hpp | 8 +- .../eve/module/core/named_shuffles/blend.hpp | 68 ++++++------ .../core/named_shuffles/broadcast_lane.hpp | 60 +++++------ .../module/core/named_shuffles/reverse.hpp | 101 +++++++++--------- .../named_shuffles/reverse_in_subgroups.hpp | 47 ++++---- .../eve/module/core/named_shuffles/slide.hpp | 46 ++++---- .../core/named_shuffles/swap_adjacent.hpp | 44 ++++---- 8 files changed, 192 insertions(+), 204 deletions(-) diff --git a/include/eve/arch/cpu/top_bits.hpp b/include/eve/arch/cpu/top_bits.hpp index e7acd805f9..6fe3743dcc 100644 --- a/include/eve/arch/cpu/top_bits.hpp +++ b/include/eve/arch/cpu/top_bits.hpp @@ -114,24 +114,24 @@ namespace detail static constexpr bool is_cheap_impl() { - if constexpr ( has_emulated_abi_v ) return true; - else if constexpr ( is_aggregated ) return top_bits::is_cheap; + if ( has_emulated_abi_v ) return true; + if constexpr ( is_aggregated ) return top_bits::is_cheap; - else if constexpr ( x86_abi ) return true; - else if constexpr ( ppc_abi ) return true; + if ( x86_abi ) return true; + if ( ppc_abi ) return true; - else if constexpr ( arm_abi ) + if ( arm_abi ) { - if constexpr ( static_size == 1 ) return true; - else if constexpr ( static_size * sizeof(scalar_type) <= 4 ) return true; - else if constexpr ( current_api >= eve::asimd ) + if ( static_size == 1 ) return true; + if ( static_size * sizeof(scalar_type) <= 4 ) return true; + if ( current_api >= eve::asimd ) { - if constexpr ( sizeof(scalar_type) >= 2 ) return true; + if ( sizeof(scalar_type) >= 2 ) return true; return static_size <= 8; // 16 chars is expensive } - else return false; + return false; } - else return false; + return false; } public: diff --git a/include/eve/arch/riscv/rvv_utils.hpp b/include/eve/arch/riscv/rvv_utils.hpp index 24e191abe6..e783ca92b8 100644 --- a/include/eve/arch/riscv/rvv_utils.hpp +++ b/include/eve/arch/riscv/rvv_utils.hpp @@ -20,16 +20,16 @@ constexpr auto rvv_lmul_v = [] { constexpr std::ptrdiff_t m1_len = __riscv_v_fixed_vlen; constexpr std::ptrdiff_t min_len = m1_len * sizeof(scalar_type) / 8; - constexpr std::ptrdiff_t expected_len = sizeof(scalar_type) * 8 * cardinal::value; - constexpr std::ptrdiff_t reg_len = std::max(min_len, expected_len); - if constexpr ( reg_len >= m1_len ) return static_cast(reg_len / m1_len); + std::ptrdiff_t expected_len = sizeof(scalar_type) * 8 * cardinal::value; + std::ptrdiff_t reg_len = std::max(min_len, expected_len); + if( reg_len >= m1_len ) return static_cast(reg_len / m1_len); else return -static_cast(m1_len / reg_len); }(); template constexpr auto rvv_logical_ratio_v = [] { - constexpr auto lmul = rvv_lmul_v; + auto lmul = rvv_lmul_v; constexpr auto element_size = sizeof(scalar_type) * 8; return lmul > 0 ? element_size / lmul : element_size * (-lmul); }(); diff --git a/include/eve/module/core/named_shuffles/blend.hpp b/include/eve/module/core/named_shuffles/blend.hpp index a7b81cd250..42ed4339c6 100644 --- a/include/eve/module/core/named_shuffles/blend.hpp +++ b/include/eve/module/core/named_shuffles/blend.hpp @@ -95,50 +95,42 @@ struct blend_t level(as {}, as {}, g, p1)); } - else if constexpr ( ((I == 0) && ...) ) return 0; - else if constexpr ( ((I == 1) && ...) ) return 0; + if( ((I == 0) && ...) ) return 0; + if( ((I == 1) && ...) ) return 0; - else if constexpr ( current_api >= sve ) return logical_simd_value ? 6 : 2; - else if constexpr ( current_api >= avx512 ) return logical_simd_value ? 6 : 2; - else if constexpr ( current_api >= vmx ) return 3; - else + if( current_api >= sve ) return logical_simd_value ? 6 : 2; + if( current_api >= avx512 ) return logical_simd_value ? 6 : 2; + if( current_api >= vmx ) return 3; + + const std::ptrdiff_t g_size = sizeof(element_type_t) * G; + const std::size_t reg_size = sizeof(element_type_t) * T::size(); + const std::size_t count_from_x = ((I == 0) + ...); + const std::size_t count_from_y = ((I == 1) + ...); + + if( current_api >= neon ) { - constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; - constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); - constexpr std::size_t count_from_x = ((I == 0) + ...); - constexpr std::size_t count_from_y = ((I == 1) + ...); + if( current_api >= asimd && (count_from_x == 1 || count_from_y == 1) ) return 2; + return 3; + } - if constexpr ( current_api >= neon ) - { - if constexpr ( current_api >= asimd && (count_from_x == 1 || count_from_y == 1) ) return 2; - else return 3; - } - else if constexpr ( current_api >= sse2 ) - { - if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) - { - using half_t = decltype(T {}.slice(lower_)); - auto [p0, p1] = detail::idxm::slice_pattern::size() / 2>(p); - auto l0 = level(as {}, as {}, g, p0); - auto l1 = level(as {}, as {}, g, p1); - return detail::idxm::add_shuffle_levels(std::array {l0, l1, 4}); - } - else if constexpr ( current_api >= sse4_1 ) - { - return g_size >= 4 ? 2 : 3; - } - else - { - if constexpr ( g_size >= 8 ) return 2; - else if constexpr ( g_size == 2 && reg_size == 4 ) return 6; - else return 7; - } - } - else + if( current_api >= sse2 ) + { + if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) { - return 2; + using half_t = decltype(T {}.slice(lower_)); + auto [p0, p1] = detail::idxm::slice_pattern::size() / 2>(p); + auto l0 = level(as {}, as {}, g, p0); + auto l1 = level(as {}, as {}, g, p1); + return detail::idxm::add_shuffle_levels(std::array {l0, l1, 4}); } + if( current_api >= sse4_1 ) return g_size >= 4 ? 2 : 3; + + if( g_size >= 8 ) return 2; + if( g_size == 2 && reg_size == 4 ) return 6; + return 7; } + + return 2; } template diff --git a/include/eve/module/core/named_shuffles/broadcast_lane.hpp b/include/eve/module/core/named_shuffles/broadcast_lane.hpp index bcdc341c7c..fbbf0b1ea2 100644 --- a/include/eve/module/core/named_shuffles/broadcast_lane.hpp +++ b/include/eve/module/core/named_shuffles/broadcast_lane.hpp @@ -68,8 +68,8 @@ struct broadcast_lane_t template static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t i) { - constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); - constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; + const std::size_t reg_size = sizeof(element_type_t) * T::size(); + const std::ptrdiff_t g_size = sizeof(element_type_t) * G; if constexpr( eve::has_aggregated_abi_v ) { @@ -80,47 +80,47 @@ struct broadcast_lane_t else if constexpr( current_api >= vmx ) return 2; else if constexpr( current_api >= sve ) { - if constexpr ( !logical_value ) return g_size > 8 ? 3 : 2; - else if constexpr ( G == 1 ) return 4; - else if constexpr ( g_size <= 8 ) return 6; - else return 7; + if( !logical_value ) return g_size > 8 ? 3 : 2; + if( G == 1 ) return 4; + if( g_size <= 8 ) return 6; + return 7; } else if constexpr( current_api >= neon ) { - if constexpr ( current_api >= asimd ) return 2; - else if constexpr ( reg_size <= 8 ) return 2; - else return 4; + if( current_api >= asimd ) return 2; + if( reg_size <= 8 ) return 2; + return 4; } // x86 - if constexpr (current_api == avx512 && logical_value) + if (current_api == avx512 && logical_value) { - if constexpr (G == 1) return 4; - else return level(detail::mask_type(tgt), g, i) + 4; + if (G == 1) return 4; + return level(detail::mask_type(tgt), g, i) + 4; } - else if constexpr (reg_size == 64) - { - if constexpr (g_size >= 16) return 2; - else if constexpr (g_size >= 2) return 3; - else return 4; - } - else if constexpr (reg_size == 32) + + if (reg_size == 64) { - if constexpr (g_size >= 16) return 2; - if constexpr (current_api == avx) return 4; - if constexpr (g_size >= 8) return 2; - if constexpr (g_size >= 4) return 3; - if constexpr (g_size >= 2 && current_api == avx512) return 3; - else return 4; + if (g_size >= 16) return 2; + if (g_size >= 2) return 3; + return 4; } - else + + if (reg_size == 32) { - if constexpr ( g_size >= 4 ) return 2; - else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2; - else if constexpr ( current_api >= ssse3 ) return 3; - else return 4; + if (g_size >= 16) return 2; + if (current_api == avx) return 4; + if (g_size >= 8) return 2; + if (g_size >= 4) return 3; + if (g_size >= 2 && current_api == avx512) return 3; + return 4; } + + if ( g_size >= 4 ) return 2; + if ( g_size == 2 && reg_size <= 8 ) return 2; + if ( current_api >= ssse3 ) return 3; + return 4; } }; diff --git a/include/eve/module/core/named_shuffles/reverse.hpp b/include/eve/module/core/named_shuffles/reverse.hpp index ec89ef2264..44612b86ec 100644 --- a/include/eve/module/core/named_shuffles/reverse.hpp +++ b/include/eve/module/core/named_shuffles/reverse.hpp @@ -66,73 +66,68 @@ struct reverse_t if constexpr( eve::has_aggregated_abi_v ) { if constexpr( G == T::size() / 2 ) return 0; - else - { - using half_t = decltype(T {}.slice(lower_)); - return level(as {}, g); - } + using half_t = decltype(T {}.slice(lower_)); + return level(as {}, g); } - else - { - constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; - constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); - constexpr bool is_expected_cardinal = T::size() == eve::expected_cardinal_v>; - if constexpr ( current_api >= sve ) + const std::ptrdiff_t g_size = sizeof(element_type_t) * G; + const std::size_t reg_size = sizeof(element_type_t) * T::size(); + const bool is_expected_cardinal = T::size() == eve::expected_cardinal_v>; + + if( current_api >= sve ) + { + if( !logical_value ) { - if constexpr ( !logical_value ) - { - if constexpr ( reg_size <= 8 ) return 2; - else if constexpr ( is_expected_cardinal && g_size <= 8 ) return 2; - else if constexpr ( is_expected_cardinal && g_size == reg_size / 2 ) return 2; - else return 3; - } - else if constexpr ( is_expected_cardinal && g_size <= 8 ) return 2; - else return level(detail::mask_type(tgt), g) + 4; + if( reg_size <= 8 ) return 2; + if( is_expected_cardinal && g_size <= 8 ) return 2; + if( is_expected_cardinal && g_size == reg_size / 2 ) return 2; + return 3; } + if( is_expected_cardinal && g_size <= 8 ) return 2; + return level(detail::mask_type(tgt), g) + 4; + } - else if constexpr (current_api >= neon) { - if constexpr ( reg_size <= 8 ) return 2; - if constexpr ( g_size == 8 ) return 2; - if constexpr ( current_api >= asimd ) return 3; - else return 4; - } + if (current_api >= neon) { + if ( reg_size <= 8 ) return 2; + if ( g_size == 8 ) return 2; + if ( current_api >= asimd ) return 3; + return 4; + } - else if constexpr ( current_api >= vmx ) return 3; + if( current_api >= vmx ) return 3; - else if constexpr ( current_api == avx512 && logical_value ) { return level(detail::mask_type(tgt), g) + 4; } + if( current_api == avx512 && logical_value ) { return level(detail::mask_type(tgt), g) + 4; } - else if constexpr ( current_api >= avx2 && reg_size >= 32 ) - { - if constexpr ( g_size >= 16 ) return 2; - else if constexpr ( g_size >= 8 ) return reg_size == 64 ? 3 : 2; - else if constexpr ( g_size >= 4 ) return 3; - else if constexpr ( g_size == 2 && current_api >= avx512 ) return 3; - else return 5; - } + if( current_api >= avx2 && reg_size >= 32 ) + { + if( g_size >= 16 ) return 2; + if( g_size >= 8 ) return reg_size == 64 ? 3 : 2; + if( g_size >= 4 ) return 3; + if( g_size == 2 && current_api >= avx512 ) return 3; + return 5; + } - else if constexpr ( current_api == avx && reg_size >= 32 ) - { - if constexpr ( g_size >= 16 ) return 2; - else if constexpr ( g_size >= 4 ) return 4; - else if constexpr ( g_size == 2 && current_api >= avx512 ) return 3; - else return 9; - } + if( current_api == avx && reg_size >= 32 ) + { + if( g_size >= 16 ) return 2; + if( g_size >= 4 ) return 4; + if( g_size == 2 && current_api >= avx512 ) return 3; + return 9; + } - else if constexpr ( g_size >= 4 ) return 2; - else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2; + if( g_size >= 4 ) return 2; + if( g_size == 2 && reg_size <= 8 ) return 2; - else if constexpr ( current_api >= ssse3 ) return 3; + if( current_api >= ssse3 ) return 3; - else if constexpr ( g_size == 2 ) return 6; + if( g_size == 2 ) return 6; - // chars on sse2 - else if constexpr ( reg_size == 2 ) return 6; + // chars on sse2 + if( reg_size == 2 ) return 6; - // swap chars + reverse shorts - else if constexpr ( reg_size <= 8 ) return 8; - else return 12; - } + // swap chars + reverse shorts + if( reg_size <= 8 ) return 8; + return 12; } }; diff --git a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp index 856d664892..e1cfcc91f8 100644 --- a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp +++ b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp @@ -79,9 +79,9 @@ struct reverse_in_subgroups_t template static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::fixed sub_g) { - constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; - constexpr std::size_t sub_size = g_size * SubG; - constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); + const std::ptrdiff_t g_size = sizeof(element_type_t) * G; + const std::size_t sub_size = g_size * SubG; + const std::size_t reg_size = sizeof(element_type_t) * T::size(); if constexpr( SubG == 1 ) return 0; else if constexpr( SubG == 2 ) return swap_adjacent.level(tgt, g); @@ -93,37 +93,40 @@ struct reverse_in_subgroups_t return level(as {}, g, sub_g); } - else if constexpr ( current_api >= sve ) + if( current_api >= sve ) { - if constexpr ( !logical_value ) + if( !logical_value ) { - if constexpr ( sub_size <= 8 ) return 2; - else return 3; + if( sub_size <= 8 ) return 2; + return 3; } - else return level(detail::mask_type(tgt), g, sub_g) + 4; + return level(detail::mask_type(tgt), g, sub_g) + 4; } - else if constexpr ( current_api >= vmx ) return 3; - else if constexpr ( current_api >= neon ) return 2; + if( current_api >= vmx ) return 3; + if( current_api >= neon ) return 2; - else if constexpr ( current_api == avx512 && logical_value ) + if( current_api == avx512 && logical_value ) { return level(detail::mask_type(tgt), g, sub_g) + 4; } - else if constexpr (sub_size == 32) { - if constexpr (g_size == 8) return 2; - else if constexpr (g_size == 4) return 3; - else if constexpr (g_size == 2 && current_api >= avx512) return 3; - else return 5; + if (sub_size == 32) { + if (g_size == 8) return 2; + if (g_size == 4) return 3; + if (g_size == 2 && current_api >= avx512) return 3; + return 5; } - else if constexpr ( g_size >= 4 ) return 2; - else if constexpr (current_api == avx && reg_size == 32) return 9; - else if constexpr ( current_api >= ssse3 ) return 3; - else if constexpr ( g_size == 2 ) return 4; - else if constexpr (reg_size <= 8) return 8; - else return 10; + if( g_size >= 4 ) return 2; + + if (current_api == avx && reg_size == 32) return 9; + + if( current_api >= ssse3 ) return 3; + + if( g_size == 2 ) return 4; + if (reg_size <= 8) return 8; + return 10; } }; diff --git a/include/eve/module/core/named_shuffles/slide.hpp b/include/eve/module/core/named_shuffles/slide.hpp index 44e6f9550e..8ff91303ee 100644 --- a/include/eve/module/core/named_shuffles/slide.hpp +++ b/include/eve/module/core/named_shuffles/slide.hpp @@ -78,7 +78,7 @@ struct slide_left_impl_t static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g, eve::index_t s) { using abi_t = typename T::abi_type; - constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); + const std::size_t reg_size = sizeof(element_type_t) * T::size(); constexpr std::ptrdiff_t S = G * S_; constexpr bool is_shift_by_16 = (S * sizeof(element_type_t) % 16) == 0; constexpr bool is_shift_by_4 = (S * sizeof(element_type_t) % 4) == 0; @@ -110,23 +110,22 @@ struct slide_left_impl_t } else if constexpr( current_api >= neon || current_api >= sve ) { - if constexpr ( reg_size <= 8 ) return 2; - else return 3; + if( reg_size <= 8 ) return 2; + return 3; } else { - if constexpr ( reg_size <= 8 ) return 2; - else if constexpr ( current_api >= avx512 ) + if( reg_size <= 8 ) return 2; + if( current_api >= avx512 ) { - if constexpr ( is_shift_by_4 ) return 2; - else if constexpr ( reg_size <= 16 ) return 2; - else if constexpr ( is_shift_by_2 ) return 3; - else if constexpr (reg_size == 64) return 5; // this is not yet done + if( is_shift_by_4 ) return 2; + if( reg_size <= 16 ) return 2; + if( is_shift_by_2 ) return 3; + if (reg_size == 64) return 5; // this is not yet done } - - if constexpr ( reg_size == 32 && is_shift_by_16 ) return 2; - else if constexpr ( current_api >= avx2 && reg_size == 32 ) return 4; - else return 2; + if( reg_size == 32 && is_shift_by_16 ) return 2; + if( current_api >= avx2 && reg_size == 32 ) { return 4; } + return 2; } } @@ -138,7 +137,7 @@ struct slide_left_impl_t { using abi_t = typename T::abi_type; constexpr std::ptrdiff_t S = S_ * G; - constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); + const std::size_t reg_size = sizeof(element_type_t) * T::size(); constexpr bool is_shift_by_16 = (S * sizeof(element_type_t) % 16) == 0; constexpr bool is_shift_by_8 = (S * sizeof(element_type_t) % 8) == 0; @@ -146,24 +145,23 @@ struct slide_left_impl_t constexpr bool is_shift_by_2 = (S * sizeof(element_type_t) % 2) == 0; if constexpr( S == 0 || S == T::size() ) return 0; - else if constexpr( logical_simd_value && !abi_t::is_wide_logical ) + if constexpr( logical_simd_value && !abi_t::is_wide_logical ) { auto mask = detail::mask_type(tgt); return level(mask, mask, g, s) + 6; } - else if constexpr( current_api >= neon || current_api >= sve ) return 2; - else if constexpr ( current_api >= avx512 ) + if constexpr( current_api >= neon || current_api >= sve ) return 2; + if( current_api >= avx512 ) { - if constexpr ( is_shift_by_4 ) return 2; - else if constexpr ( is_shift_by_2 ) return 3; + if( is_shift_by_4 ) return 2; + if( is_shift_by_2 ) return 3; } + if( is_shift_by_16 && reg_size == 32 ) return 2; + if( current_api >= avx2 && reg_size == 32 ) return 4; - if constexpr ( is_shift_by_16 && reg_size == 32 ) return 2; - else if constexpr ( current_api >= avx2 && reg_size == 32 ) return 4; - - else if constexpr ( current_api >= sse4_2 ) return 2; + if( current_api >= sse4_2 ) return 2; // sse2 - else return is_shift_by_8 ? 2 : 6; + return is_shift_by_8 ? 2 : 6; } }; diff --git a/include/eve/module/core/named_shuffles/swap_adjacent.hpp b/include/eve/module/core/named_shuffles/swap_adjacent.hpp index 0f3ae72b46..16ef7ae98c 100644 --- a/include/eve/module/core/named_shuffles/swap_adjacent.hpp +++ b/include/eve/module/core/named_shuffles/swap_adjacent.hpp @@ -67,50 +67,50 @@ struct swap_adjacent_t template static constexpr std::ptrdiff_t level(eve::as tgt, eve::fixed g) { - constexpr std::ptrdiff_t g_size = sizeof(element_type_t) * G; - constexpr std::size_t reg_size = sizeof(element_type_t) * T::size(); - constexpr std::size_t fund_size = eve::fundamental_cardinal_v; + const std::ptrdiff_t g_size = sizeof(element_type_t) * G; + const std::size_t reg_size = sizeof(element_type_t) * T::size(); + const std::size_t fund_size = eve::fundamental_cardinal_v; - if constexpr ( current_api >= sve ) + if( current_api >= sve ) { - if constexpr( arithmetic_simd_value ) + if( arithmetic_simd_value ) { - if constexpr (g_size <= 4 || g_size == fund_size / 2) return 2; - else return 3; + if (g_size <= 4 || g_size == fund_size / 2) return 2; + return 3; } else { - if constexpr ( g_size == 8 && fund_size == 16 ) return 2; - else return level(detail::mask_type(tgt), g) + 4; + if( g_size == 8 && fund_size == 16 ) return 2; + return level(detail::mask_type(tgt), g) + 4; } } - else if constexpr ( current_api >= vmx ) return 3; - else if constexpr ( current_api >= neon ) return 2; + if( current_api >= vmx ) return 3; + if( current_api >= neon ) return 2; - else if constexpr ( current_api == avx512 && logical_simd_value ) + if( eve::current_api == avx512 && logical_simd_value ) { return level(detail::mask_type(tgt), g) + 4; } - else if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) + if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 ) { using half_t = decltype(T {}.slice(lower_)); std::ptrdiff_t half_l = level(eve::as {}, g); // since we are adding, we need to deal with aggregation - if constexpr ( reg_size > 32 ) return half_l; - else return detail::idxm::add_shuffle_levels({half_l, half_l, 4}); + if( reg_size > 32 ) return half_l; + return detail::idxm::add_shuffle_levels({half_l, half_l, 4}); } - if constexpr ( current_api >= sse2 ) + if( current_api >= sse2 ) { - if constexpr ( g_size >= 4 ) return 2; - else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2; - else if constexpr ( current_api >= ssse3 ) return 3; - else if constexpr ( g_size == 2 ) return 4; - else return 6; + if( g_size >= 4 ) return 2; + if( g_size == 2 && reg_size <= 8 ) return 2; + if( current_api >= ssse3 ) return 3; + if( g_size == 2 ) return 4; + return 6; } - else return 2; + return 2; } };