diff --git a/include/eve/detail/shuffle_v2/idxm.hpp b/include/eve/detail/shuffle_v2/idxm.hpp index e9df439d9b..9cceabb84b 100644 --- a/include/eve/detail/shuffle_v2/idxm.hpp +++ b/include/eve/detail/shuffle_v2/idxm.hpp @@ -133,6 +133,57 @@ is_just_setting_one_lane(std::span idxs) return std::array {pos - f, *pos}; } +template +constexpr auto +upscale_pattern(std::span idxs) +{ + if constexpr( N == 1 ) return std::optional> {}; + else + { + using res_t = std::optional>; + std::array res {}; + + for( int i = 0; i != N / 2; i += 1 ) + { + int i2 = i + i; + std::ptrdiff_t i0 = idxs[i2]; + std::ptrdiff_t i1 = idxs[i2 + 1]; + + if( i0 == na_ || i1 == na_ ) + { + if( i0 == i1 || i0 == we_ || i1 == we_ ) + { + res[i] = na_; + continue; + } + return res_t{}; + } + + if( i0 == we_ && i1 == we_ ) + { + res[i] = we_; + continue; + } + + if( i0 == we_ ) i0 = i1 - 1; + if( i1 == we_ ) i1 = i0 + 1; + + if( i0 + 1 != i1 || i0 % 2 != 0 ) return res_t{}; + + res[i] = i0 / 2; + } + + return res_t{res}; + } +} + +template +constexpr auto +upscale_pattern(const std::array& idxs) +{ + return upscale_pattern(std::span(idxs)); +} + template constexpr auto to_pattern() @@ -237,7 +288,14 @@ shuffle_halves_independetly(const std::array& p) } // works for 2 registers too +constexpr std::optional> +is_repeating_pattern(const std::array) +{ + return std::nullopt; +} + template +requires(N > 1) constexpr std::optional> is_repeating_pattern(const std::array& p) { @@ -250,8 +308,8 @@ is_repeating_pattern(const std::array& p) std::ptrdiff_t y = p[i + (std::size_t)half]; // even havles for x, odd for y - if (0 <= x && (x / half) % 2) return std::nullopt; - if (0 <= y && (y / half) % 2 == 0) return std::nullopt; + if( 0 <= x && (x / half) % 2 ) return std::nullopt; + if( 0 <= y && (y / half) % 2 == 0 ) return std::nullopt; if( y > 0 ) y -= half; @@ -791,6 +849,39 @@ is_slide_right(std::span idxs) return (m - idxs.data()) - *m; } +template +constexpr std::optional> +slide_as_slide2_with_0(std::span idxs) +{ + if( const auto s = is_slide_left(idxs) ) + { + std::array res = {}; + + for( std::ptrdiff_t i = 0; i != N; ++i ) { res[i] = i + *s; } + + return res; + } + + if( const auto s = is_slide_right(idxs) ) + { + std::array res = {}; + + for( std::ptrdiff_t i = 0; i != *s; ++i ) { res[i] = i + 2 * N - *s; } + for( std::ptrdiff_t i = *s; i != N; ++i ) { res[i] = i - *s; } + + return res; + } + + return std::nullopt; +} + +template +constexpr auto +slide_as_slide2_with_0(const std::array& idxs) +{ + return slide_as_slide2_with_0(std::span(idxs)); +} + constexpr bool is_reverse(std::span idxs) { @@ -866,16 +957,9 @@ split_to_groups(const std::array& idxs) return split_to_groups(std::span(idxs)); } -/* - * First shuffle big groups, then shuffle withing groups - * - * Big big questions about the order of small/big shuffle - * and when to do zeroes. - * Case by case we'll see. - */ template constexpr auto -put_bigger_groups_in_position(std::span idxs) +first_groups_then_in_groups(std::span idxs) { if constexpr( G == 0 || G > N ) { @@ -946,9 +1030,112 @@ put_bigger_groups_in_position(std::span idxs) template constexpr auto -put_bigger_groups_in_position(const std::array& idxs) +first_in_groups_then_groups(std::span idxs) +{ + constexpr std::size_t group_count = N / G; + + using group_pattern_t = std::array; + using withing_pattern_t = std::array; + + std::optional> res; + + group_pattern_t groups_pattern = {}; + + // computing group pattern + for( std::size_t i = 0; i != group_count; ++i ) + { + const std::size_t group_start = i * G; + const std::size_t group_end = (i + 1) * G; + + std::ptrdiff_t group_index = we_; + + for( std::size_t j = group_start; j != group_end; ++j ) + { + // group can still be entierly we_ or na_ + if( idxs[j] < 0 ) + { + group_index = std::max(idxs[j], group_index); + continue; + } + std::ptrdiff_t cur = idxs[j] / G; + if( cur == group_index || group_index < 0 ) + { + group_index = cur; + continue; + } + // group has elements from 2 different groups + return res; + } + groups_pattern[i] = group_index; + } + + withing_pattern_t within_groups_pattern = {}; + within_groups_pattern.fill(we_); + + for( std::ptrdiff_t i = 0; i != group_count; ++i ) + { + // group shuffle will do it + if( groups_pattern[i] < 0 ) continue; + + const std::ptrdiff_t group_start = i * G; + const std::ptrdiff_t original_group_start = groups_pattern[i] * G; + + for( std::size_t within_group = 0; within_group != G; ++within_group ) + { + std::ptrdiff_t new_idx = group_start + within_group; + std::ptrdiff_t old_idx = original_group_start + within_group; + if( idxs[new_idx] == we_ ) continue; + + if( within_groups_pattern[old_idx] == we_ || within_groups_pattern[old_idx] == idxs[new_idx] ) + { + within_groups_pattern[old_idx] = idxs[new_idx]; + continue; + } + // 2 values for one position + return res; + } + } + res = kumi::tuple {within_groups_pattern, groups_pattern}; + return res; +} + +template +constexpr auto +group_within_group(std::span idxs) +{ + using shuff_t = std::array; + using res_t = std::optional>; + if constexpr( G == 0 || G > N ) return res_t {}; + else + { + // not supported so far + if( !are_below_ignoring_specials(idxs, N) ) return res_t {}; + + // groups/in groups is strictly more powerful + // but in groups/groups can result in cheaper 0s. + // originally doing in_groups/groups was motivated + // by shift on avx2. + // + // This whole funciton a bit questionable, + // and is likely to need revisions at some point. + if( auto r = first_in_groups_then_groups(idxs) ) + { + return res_t {kumi::tuple {get<0>(*r), expand_group(get<1>(*r))}}; + } + if( auto r = first_groups_then_in_groups(idxs) ) + { + return res_t {kumi::tuple {expand_group(get<0>(*r)), get<1>(*r)}}; + } + + return res_t {}; + } +} + +template +constexpr auto +group_within_group(const std::array& idxs) { - return put_bigger_groups_in_position(std::span(idxs)); + return group_within_group(std::span(idxs)); } constexpr auto diff --git a/include/eve/detail/shuffle_v2/native_shuffle_helpers.hpp b/include/eve/detail/shuffle_v2/native_shuffle_helpers.hpp index 29e64b9b9c..5b2bf79400 100644 --- a/include/eve/detail/shuffle_v2/native_shuffle_helpers.hpp +++ b/include/eve/detail/shuffle_v2/native_shuffle_helpers.hpp @@ -66,18 +66,18 @@ struct expanded_pattern_t : pattern_t static constexpr auto repeated_16 = idxm::repeated_pattern_of_size<16 / g_size, I...>; static constexpr auto repeated_32 = idxm::repeated_pattern_of_size<32 / g_size, I...>; - static constexpr auto shuffle_16_first = idxm::put_bigger_groups_in_position<16 / g_size>(idxs); - static constexpr auto shuffle_8_first = idxm::put_bigger_groups_in_position<8 / g_size>(idxs); - static constexpr auto shuffle_4_first = idxm::put_bigger_groups_in_position<4 / g_size>(idxs); - static constexpr auto shuffle_2_first = idxm::put_bigger_groups_in_position<2 / g_size>(idxs); + static constexpr auto shuffle_16in16 = idxm::group_within_group<16 / g_size>(idxs); + static constexpr auto shuffle_8in8 = idxm::group_within_group<8 / g_size>(idxs); + static constexpr auto shuffle_4in4 = idxm::group_within_group<4 / g_size>(idxs); + static constexpr auto shuffle_2in2 = idxm::group_within_group<2 / g_size>(idxs); - template - static constexpr auto shuffle_n_first(eve::fixed) + template + static constexpr auto shuffle_NinN(eve::fixed) { - if constexpr (FirstSize == 16) return shuffle_16_first; - if constexpr (FirstSize == 8) return shuffle_8_first; - if constexpr (FirstSize == 4) return shuffle_4_first; - if constexpr (FirstSize == 2) return shuffle_2_first; + if constexpr (N == 16) return shuffle_16in16; + if constexpr (N == 8) return shuffle_8in8; + if constexpr (N == 4) return shuffle_4in4; + if constexpr (N == 2) return shuffle_2in2; } diff --git a/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l3.hpp b/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l3.hpp index 80a858bb6a..fb8e8c73fc 100644 --- a/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l3.hpp +++ b/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l3.hpp @@ -77,6 +77,7 @@ shuffle_l3_(EVE_SUPPORTS(neon128_), P p, fixed g, wide x) requires(P::out_reg_size == P::reg_size) { if constexpr( auto r = shuffle_l3_and_0(p, g, x); matched_shuffle ) return r; + else if constexpr ( auto r = shuffle_l3_slide_with_0(p, g, x); matched_shuffle ) return r; else if constexpr( auto r = shuffle_l3_neon_tbl(p, g, x); matched_shuffle ) return r; else return no_matching_shuffle_t {}; } diff --git a/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l4_l5.hpp b/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l4_l5.hpp index 128891294c..85b5e4e7dc 100644 --- a/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l4_l5.hpp +++ b/include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l4_l5.hpp @@ -19,13 +19,13 @@ shuffle_l4_l5_neon_reverse(P, fixed g, wide x) return kumi::tuple {no_matching_shuffle, eve::index<-1>}; else { - // swap havles and reverse halves - x = shuffle_l<2>(x, eve::lane<8 / sizeof(T)>, eve::pattern<1, 0>); + // swap havles + reverse halves is already computed + constexpr auto p0 = get<0>(*P::shuffle_8in8); + constexpr auto p1 = get<1>(*P::shuffle_8in8); + auto [r0, l0] = shuffle_v2_core(x, eve::lane, idxm::to_pattern()); + auto [r1, l1] = shuffle_v2_core(r0, eve::lane, idxm::to_pattern()); - // halve reverse is already computed - constexpr auto within8 = get<1>(*P::shuffle_8_first); - x = shuffle_l<2>(x, eve::lane, idxm::to_pattern()); - return kumi::tuple {x, eve::index<4>}; + return kumi::tuple {r1, idxm::add_shuffle_levels(l0, l1)}; } } diff --git a/include/eve/detail/shuffle_v2/simd/common/shuffle_l3.hpp b/include/eve/detail/shuffle_v2/simd/common/shuffle_l3.hpp index 60e5b235e2..a5271249ae 100644 --- a/include/eve/detail/shuffle_v2/simd/common/shuffle_l3.hpp +++ b/include/eve/detail/shuffle_v2/simd/common/shuffle_l3.hpp @@ -21,4 +21,15 @@ shuffle_l3_and_0(P p, fixed g, wide x) } } +template +EVE_FORCEINLINE auto +shuffle_l3_slide_with_0(P, fixed g, wide x) +{ + if constexpr ( constexpr auto p = idxm::slide_as_slide2_with_0(P::idxs); !p ) return no_matching_shuffle; + else + { + return shuffle_l<2>(x, wide{0}, g, idxm::to_pattern<*p>()); + } +} + } diff --git a/include/eve/detail/shuffle_v2/simd/common/simplify_plain_shuffle.hpp b/include/eve/detail/shuffle_v2/simd/common/simplify_plain_shuffle.hpp index 64b9d18777..c5623229a5 100644 --- a/include/eve/detail/shuffle_v2/simd/common/simplify_plain_shuffle.hpp +++ b/include/eve/detail/shuffle_v2/simd/common/simplify_plain_shuffle.hpp @@ -8,57 +8,16 @@ #pragma once #include +#include namespace eve::detail { -template -constexpr auto -upscale_pattern_impl(std::array p) - -> std::optional> -{ - if( N == 1 ) return std::nullopt; - - std::array res {}; - - for( int i = 0; i != N / 2; i += 1 ) - { - int i2 = i + i; - std::ptrdiff_t i0 = p[i2]; - std::ptrdiff_t i1 = p[i2 + 1]; - - if( i0 == na_ || i1 == na_ ) - { - if( i0 == i1 || i0 == we_ || i1 == we_ ) - { - res[i] = na_; - continue; - } - return std::nullopt; - } - - if( i0 == we_ && i1 == we_ ) - { - res[i] = we_; - continue; - } - - if( i0 == we_ ) i0 = i1 - 1; - if( i1 == we_ ) i1 = i0 + 1; - - if( i0 + 1 != i1 || i0 % 2 != 0 ) { return std::nullopt; } - - res[i] = i0 / 2; - } - - return res; -} template constexpr auto upscale_pattern(pattern_t p) { - constexpr std::array p_arr {I...}; - constexpr std::optional attempt = upscale_pattern_impl(p_arr); + constexpr std::optional attempt = idxm::upscale_pattern(std::array{I...}); if constexpr( !attempt ) return p; else return idxm::to_pattern<*attempt>(); } diff --git a/include/eve/detail/shuffle_v2/simd/x86/shuffle_l4_l5.hpp b/include/eve/detail/shuffle_v2/simd/x86/shuffle_l4_l5.hpp index f2b2726575..c4815d53f1 100644 --- a/include/eve/detail/shuffle_v2/simd/x86/shuffle_l4_l5.hpp +++ b/include/eve/detail/shuffle_v2/simd/x86/shuffle_l4_l5.hpp @@ -43,15 +43,15 @@ shuffle_l4_l5_x86_put_u64x2_in_position(P, fixed, wide x) // there is nothing we can do for shorts on avx else if constexpr( P::reg_size == 32 && P::g_size <= 2 && current_api == avx ) return no; else if constexpr( P::has_zeroes && current_api < avx2 ) return no; - else if constexpr( !P::shuffle_16_first ) return no; + else if constexpr( !P::shuffle_16in16 ) return no; else { - constexpr auto shuffle16x2 = get<0>(*P::shuffle_16_first); - constexpr auto within16 = get<1>(*P::shuffle_16_first); + constexpr auto p0 = get<0>(*P::shuffle_16in16); + constexpr auto p1 = get<1>(*P::shuffle_16in16); + auto [r0, l0] = shuffle_v2_core(x, eve::lane, idxm::to_pattern()); + auto [r1, l1] = shuffle_v2_core(r0, eve::lane, idxm::to_pattern()); - x = shuffle_l<2>(x, eve::lane<16 / sizeof(T)>, idxm::to_pattern()); - auto [x_, l] = shuffle_v2_core(x, eve::lane, idxm::to_pattern()); - return kumi::tuple {x_, idxm::add_shuffle_levels(l, eve::index<2>)}; + return kumi::tuple {r1, idxm::add_shuffle_levels(l0, l1)}; } } diff --git a/include/eve/detail/shuffle_v2/simd/x86/shuffle_l6_l7.hpp b/include/eve/detail/shuffle_v2/simd/x86/shuffle_l6_l7.hpp index ad62f37a31..50d91879c5 100644 --- a/include/eve/detail/shuffle_v2/simd/x86/shuffle_l6_l7.hpp +++ b/include/eve/detail/shuffle_v2/simd/x86/shuffle_l6_l7.hpp @@ -35,18 +35,14 @@ shuffle_x86_l6_l7_u32_then_u16(P, fixed, T x) // this is only for sse2 if constexpr( current_api >= ssse3 ) return no; else if constexpr( P::g_size != 2 || P::has_zeroes ) return no; - else if constexpr( !P::shuffle_4_first ) return no; + else if constexpr( !P::shuffle_4in4 ) return no; else { - constexpr auto shuffle4x4 = get<0>(*P::shuffle_4_first); - constexpr auto within4 = get<1>(*P::shuffle_4_first); - - x = shuffle_l<2>(x, eve::lane<4 / P::e_t_size>, idxm::to_pattern()); - - // this might be actually 2 and not 4 but should not be a problem, which'd lower - // the total level but it's ok - we will still compute correctly - auto [x_, l] = shuffle_v2_core(x, eve::lane, idxm::to_pattern()); - return kumi::tuple {x_, idxm::add_shuffle_levels(l, eve::index<2>)}; + constexpr auto p0 = get<0>(*P::shuffle_4in4); + constexpr auto p1 = get<1>(*P::shuffle_4in4); + auto [r0, l0] = shuffle_v2_core(x, eve::lane, idxm::to_pattern()); + auto [r1, l1] = shuffle_v2_core(r0, eve::lane, idxm::to_pattern()); + return kumi::tuple {r1, idxm::add_shuffle_levels(l0, l1)}; } } diff --git a/include/eve/detail/shuffle_v2/simd/x86/shuffle_l_fallback.hpp b/include/eve/detail/shuffle_v2/simd/x86/shuffle_l_fallback.hpp index d3343b4e2f..b86b03feaa 100644 --- a/include/eve/detail/shuffle_v2/simd/x86/shuffle_l_fallback.hpp +++ b/include/eve/detail/shuffle_v2/simd/x86/shuffle_l_fallback.hpp @@ -14,15 +14,20 @@ template constexpr bool shuffle_l_fallback_try_sse2_group_plus_u8(P) { - constexpr auto group_within = P::shuffle_n_first(eve::lane); + constexpr auto p0p1 = P::shuffle_NinN(eve::lane); - if constexpr( !group_within ) return false; + if constexpr( !p0p1 ) return false; else { - constexpr auto most_repeated = idxm::most_repeated_pattern_a(*group_within)>; - - if (idxm::is_rotate(most_repeated)) return true; - if (idxm::is_slide_left(most_repeated)) return true; + // We split into shuffle big registers and then shuffle u8s internally. + // This is limited by what u8 shuffles we can do: rotate and slide. + constexpr auto p0 = get<0>(*p0p1); + constexpr auto p1 = get<1>(*p0p1); + constexpr auto u8pattern = idxm::upscale_pattern(p0) ? p1 : p0; // u8 pattern can't be upscaled + constexpr auto most_repeated = idxm::most_repeated_pattern_a; + + if( idxm::is_rotate(most_repeated) ) return true; + if( idxm::is_slide_left(most_repeated) ) return true; return false; } } @@ -31,24 +36,31 @@ template EVE_FORCEINLINE auto shuffle_l_fallback_sse2_uN_u8(P, fixed g, wide x) { - constexpr auto match = []() - { - if ( shuffle_l_fallback_try_sse2_group_plus_u8<8>(P {}) ) return 8; - if ( shuffle_l_fallback_try_sse2_group_plus_u8<4>(P{}) ) return 4; - if ( shuffle_l_fallback_try_sse2_group_plus_u8<2>(P{}) ) return 2; - return -1; - }(); + constexpr auto no = kumi::tuple {no_matching_shuffle, eve::index<-1>}; - if constexpr( match == -1 ) return kumi::tuple {no_matching_shuffle, eve::index<-1>}; + // There should be code for sizeof(T) == 2 but we don't have it yet. + if constexpr( sizeof(T) > 1 ) return no; else { - constexpr auto groups_within = *P::shuffle_n_first(eve::lane); - constexpr auto groups_p = idxm::to_pattern(groups_within)>(); - constexpr auto within_p = idxm::to_pattern(groups_within)>(); - - auto [groups, groups_l] = shuffle_v2_core(x, eve::lane, groups_p); - auto [res, within_l] = shuffle_v2_core(groups, g, within_p); - return kumi::tuple {res, idxm::add_shuffle_levels(groups_l, within_l)}; + constexpr auto match = []() + { + if( shuffle_l_fallback_try_sse2_group_plus_u8<8>(P {}) ) return 8; + if( shuffle_l_fallback_try_sse2_group_plus_u8<4>(P {}) ) return 4; + if( shuffle_l_fallback_try_sse2_group_plus_u8<2>(P {}) ) return 2; + return -1; + }(); + + if constexpr( match == -1 ) return kumi::tuple {no_matching_shuffle, eve::index<-1>}; + else + { + constexpr auto p0p1 = *P::shuffle_NinN(eve::lane); + constexpr auto p0 = get<0>(p0p1); + constexpr auto p1 = get<1>(p0p1); + auto [r0, l0] = shuffle_v2_core(x, g, idxm::to_pattern()); + auto [r1, l1] = shuffle_v2_core(r0, g, idxm::to_pattern()); + + return kumi::tuple {r1, idxm::add_shuffle_levels(l0, l1)}; + } } } diff --git a/include/eve/module/core/named_shuffles/slide.hpp b/include/eve/module/core/named_shuffles/slide.hpp index dbb319959b..8c1bcad471 100644 --- a/include/eve/module/core/named_shuffles/slide.hpp +++ b/include/eve/module/core/named_shuffles/slide.hpp @@ -78,10 +78,11 @@ struct slide_left_impl_t template static constexpr auto level(eve::as, eve::fixed g, eve::index_t s) { + const std::size_t reg_size = sizeof(element_type_t) * T::size(); constexpr std::ptrdiff_t S = G * S_; + if constexpr( S == 0 ) return 0; else if constexpr ( S == T::size() ) return 1; - else if constexpr ( current_api >= neon ) return 3; else if constexpr( eve::has_aggregated_abi_v ) { using half_t = decltype(T {}.slice(lower_)); @@ -94,8 +95,21 @@ struct slide_left_impl_t auto just_second = level(eve::as{}, g, s); return std::max(halves_together, just_second); } + } else if constexpr ( current_api >= neon ) { + if (reg_size <= 8) return 2; + return 3; + } else { + if (current_api >= avx512 && reg_size == 64) { + if (S % 4 == 0) return 3; + return 5; + } + if (current_api >= avx2 && reg_size == 32) { + if (S % 8 == 0) return 2; + if (S % 4 == 0) return 3; + return 4; + } + return 2; } - else { return 2; } } // Two args @@ -105,6 +119,7 @@ struct slide_left_impl_t { constexpr std::ptrdiff_t S = S_ * G; constexpr bool is_shift_by_8 = (S * sizeof(element_type_t) % 8) == 0; + if constexpr( S == 0 || S == T::size() ) return 0; if constexpr ( current_api >= neon ) return 2; diff --git a/test/unit/api/regular/shuffle_v2/idxm.cpp b/test/unit/api/regular/shuffle_v2/idxm.cpp index f270172e40..7cd11b736d 100644 --- a/test/unit/api/regular/shuffle_v2/idxm.cpp +++ b/test/unit/api/regular/shuffle_v2/idxm.cpp @@ -39,6 +39,39 @@ TTS_CASE("are_below_ignoring_specials") test(std::array {we_, we_}, 1, true); }; +TTS_CASE("upscale_pattern") { + auto yes_test = [](auto _in, auto _expected) + { + auto in = to_idxs(_in); + auto expected = to_idxs(_expected); + + auto actual = eve::detail::idxm::upscale_pattern(in); + TTS_EXPECT(actual); + + TTS_EQUAL(expected, *actual) << tts::as_string(in); + }; + + auto no_test = [](auto _in) + { + auto in = to_idxs(_in); + auto actual = eve::detail::idxm::upscale_pattern(in); + + TTS_EXPECT_NOT(actual); + }; + + yes_test(std::array{0, 1, 2, 3}, std::array{0, 1}); + yes_test(std::array{2, 3, 0, 1}, std::array{1, 0}); + yes_test(std::array{2, 3, 2, 3}, std::array{1, 1}); + yes_test(std::array{2, 3, we_, 1}, std::array{1, 0}); + yes_test(std::array{2, 3, na_, na_}, std::array{1, na_}); + yes_test(std::array{2, 3, we_, we_}, std::array{1, we_}); + + no_test(std::array{na_, 1, 2, 3}); + no_test(std::array{1, 0, 2, 3}); + no_test(std::array{0, 1, 3, 2}); +}; + + TTS_CASE("to_pattern") { constexpr auto arr = to_idxs(std::array {0, 1, we_, na_}); @@ -747,6 +780,34 @@ TTS_CASE("is_slide_right") test(std::array {na_, na_, 1, 2}, -1); }; +TTS_CASE("slide_as_slide2_with_0") +{ + auto yes_test = [](auto _in, auto _expected) + { + auto in = to_idxs(_in); + auto expected = to_idxs(_expected); + auto actual = eve::detail::idxm::slide_as_slide2_with_0(in); + TTS_EXPECT(actual) << tts::as_string(in); + TTS_EQUAL(expected, *actual) << tts::as_string(in); + }; + + auto no_test = [](auto _in) + { + auto in = to_idxs(_in); + auto actual = eve::detail::idxm::slide_as_slide2_with_0(in); + + TTS_EXPECT_NOT(actual); + }; + + yes_test(std::array {0, 1, 2, 3}, std::array{0, 1, 2, 3}); + yes_test(std::array {1, 2, 3, na_}, std::array{1, 2, 3, 4}); + yes_test(std::array {na_, 0, 1, 2}, std::array{7, 0, 1, 2}); + yes_test(std::array {na_, na_, 0, 1}, std::array{6, 7, 0, 1}); + + no_test(std::array {na_, na_, 1, 2}); + no_test(std::array {1, 2, na_, na_}); +}; + TTS_CASE("is_reverse") { auto test = [](auto _in, bool expected) @@ -812,44 +873,58 @@ TTS_CASE("add shuffle levels") TTS_EQUAL(7, add(eve::index<4>, eve::index<3>, eve::index<1>)); }; -TTS_CASE("put bigger group in position") -{ - auto yes_test = [](auto _in, eve::fixed, auto _groups, auto _within) +TTS_CASE("group_within_group") { + auto yes_test = [](auto _in, eve::fixed, auto _s0, auto _s1) { auto in = to_idxs(_in); - auto groups = to_idxs(_groups); - auto within = to_idxs(_within); + auto s0 = to_idxs(_s0); + auto s1 = to_idxs(_s1); - auto [actual_groups, actual_within] = *eve::detail::idxm::put_bigger_groups_in_position(in); + auto [actual_s0, actual_s1] = *eve::detail::idxm::group_within_group(in); - TTS_EQUAL(groups, actual_groups); - TTS_EQUAL(within, actual_within); + TTS_EQUAL(actual_s0, s0); + TTS_EQUAL(actual_s1, s1); }; auto no_test = [](auto _in, eve::fixed) { auto in = to_idxs(_in); - auto actual = eve::detail::idxm::put_bigger_groups_in_position(in); + auto actual = eve::detail::idxm::group_within_group(in); TTS_EXPECT_NOT(actual); }; - yes_test(std::array {3, 2, 0, 1}, eve::lane<2>, std::array {1, 0}, std::array {1, 0, 2, 3}); - yes_test(std::array {3, 2, na_, 1}, eve::lane<2>, std::array {1, 0}, std::array {1, 0, na_, 3}); - yes_test(std::array {3, 2, 3, 2}, eve::lane<2>, std::array {1, 1}, std::array {1, 0, 3, 2}); - yes_test( - std::array {3, 2, na_, na_}, eve::lane<2>, std::array {1, we_}, std::array {1, 0, na_, na_}); - yes_test(std::array {3, 2, 0, 1}, eve::lane<4>, std::array {0}, std::array {3, 2, 0, 1}); - yes_test(std::array {3, 2, 0, 1}, eve::lane<4>, std::array {0}, std::array {3, 2, 0, 1}); - yes_test(std::array {3, 2, 0, 1}, eve::lane<1>, std::array {3, 2, 0, 1}, std::array {0, 1, 2, 3}); + // in group then group + yes_test(std::array {3, 2, 0, 1}, eve::lane<2>, std::array {0, 1, 3, 2}, std::array {2, 3, 0, 1}); + yes_test(std::array {3, 2, 0, 1}, eve::lane<4>, std::array {3, 2, 0, 1}, std::array {0, 1, 2, 3}); + yes_test(std::array {3, 2, 0, 1}, eve::lane<1>, std::array {0, 1, 2, 3}, std::array {3, 2, 0, 1}); + yes_test(std::array {3, 2, na_, 1}, eve::lane<2>, std::array {na_, 1, 3, 2}, std::array {2, 3, 0, 1}); + yes_test(std::array {3, 2, na_, na_}, eve::lane<2>, std::array {we_, we_, 3, 2}, std::array {2, 3, na_, na_}); + yes_test(std::array {3, 2, we_, we_}, eve::lane<2>, std::array {we_, we_, 3, 2}, std::array {2, 3, we_, we_}); + yes_test(std::array {3, 2, 3, 2}, eve::lane<2>, std::array {we_, we_, 3, 2}, std::array {2, 3, 2, 3}); yes_test(std::array {3, 2, 6, 7, 6, 7, 0, 1}, eve::lane<2>, - std::array {1, 3, 3, 0}, - std::array {1, 0, 2, 3, 4, 5, 6, 7}); + std::array {0, 1, 3, 2, we_, we_, 6, 7}, + std::array {2, 3, 6, 7, 6, 7, 0, 1}); yes_test(std::array {7, na_, na_, na_, na_, na_, na_, na_}, eve::lane<4>, - std::array {1, we_}, - std::array {3, na_, na_, na_, na_, na_, na_, na_}); + std::array {we_, we_, we_, we_, 7, na_, na_, na_}, + std::array {4, 5, 6, 7, na_, na_, na_, na_}); + yes_test( + std::array{1, 1, 2, 3, 4, 5, 6, 7, 8}, + eve::lane<1>, + std::array{-2, 1, 2, 3, 4, 5, 6, 7, 8}, + std::array{1, 1, 2, 3, 4, 5, 6, 7, 8} + ); + + // group then in group + yes_test(std::array {3, 2, 2, 3}, eve::lane<2>, std::array {2, 3, 2, 3}, std::array {1, 0, 2, 3}); + yes_test(std::array {3, we_, 2, 3}, eve::lane<2>, std::array {2, 3, 2, 3}, std::array {1, we_, 2, 3}); + yes_test(std::array {3, we_, na_, 3}, eve::lane<2>, std::array {2, 3, 2, 3}, std::array {1, we_, na_, 3}); + yes_test(std::array {6, na_, na_, na_, 7, na_, na_, na_}, + eve::lane<4>, + std::array {4, 5, 6, 7, 4, 5, 6, 7}, + std::array {2, na_, na_, na_, 7, na_, na_, na_}); no_test(std::array {3, 0, 0, 1}, eve::lane<2>); }; diff --git a/test/unit/api/regular/shuffle_v2/slide_left_1.cpp b/test/unit/api/regular/shuffle_v2/slide_left_1.cpp index 3c05bc54db..73fc6f5ac5 100644 --- a/test/unit/api/regular/shuffle_v2/slide_left_1.cpp +++ b/test/unit/api/regular/shuffle_v2/slide_left_1.cpp @@ -34,17 +34,17 @@ TTS_CASE("Slide left 1, example") { TTS_CASE("Explicit") { using w_i = eve::wide>; w_i x{1, 2, 3, 4, 5, 6, 7, 8}; - //constexpr auto na_ = eve::na_; - auto y = eve::slide_left2(x, eve::index<7>); + constexpr auto na_ = eve::na_; + auto [y, l] = eve::shuffle_v2_core(x, eve::pattern<7, na_, na_, na_, na_, na_, na_, na_>); TTS_EQUAL(y, w_i({8, 0, 0, 0, 0, 0, 0, 0})); - //TTS_EQUAL(l(), 2); + TTS_EQUAL(l(), 4); }; #endif TTS_CASE_TPL("Check slide_left, 1 arg, generic", eve::test::simd::all_types) (tts::type) { - if constexpr( eve::current_api <= eve::sse4_2 ) + if constexpr( eve::current_api <= eve::sse4_2 || eve::current_api == eve::asimd ) { shuffle_test::named_shuffle1_test< /*supports_G_eq_T_Size*/ true>(eve::as {},