Skip to content

Commit

Permalink
debug avx2 slide left issue (#1734)
Browse files Browse the repository at this point in the history
  • Loading branch information
DenisYaroshevskiy authored Jan 24, 2024
1 parent 6fac6da commit 304fc64
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 39 deletions.
49 changes: 45 additions & 4 deletions include/eve/detail/shuffle_v2/simd/x86/idxm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ x86_permute2f128_one_reg_mask(std::span<const std::ptrdiff_t, 2> _idxs)
constexpr int
x86_blend_immediate_mask(std::span<const std::ptrdiff_t> idxs, std::ptrdiff_t g)
{
int r = 0;
int s = std::ssize(idxs);
int r = 0;
int s = std::ssize(idxs);
int pos = 0;
for(auto i : idxs )
for( auto i : idxs )
{
for (int j = 0; j != g; ++j) {
for( int j = 0; j != g; ++j )
{
// we_ < s
if( i * g >= s ) { r |= 1 << pos; }
++pos;
Expand All @@ -67,4 +68,44 @@ x86_blend_immediate_mask(std::span<const std::ptrdiff_t> idxs, std::ptrdiff_t g)
return r;
}

template<std::ptrdiff_t G, std::size_t N>
constexpr auto
x86_pshuvb_pattern(const std::array<std::ptrdiff_t, N>& idxs);

template<std::ptrdiff_t G, std::size_t N>
constexpr auto
x86_pshuvb_pattern(std::span<const std::ptrdiff_t, N> idxs)
{
if constexpr( G != 1 ) return x86_pshuvb_pattern<1>(expand_group<G>(idxs));
else
{
static_assert(N == 16 || N == 32 || N == 64);
using arr_t = std::array<std::ptrdiff_t, N>;
using res_t = std::optional<arr_t>;

arr_t res = {};
for( std::size_t i = 0; i != N; ++i )
{
std::ptrdiff_t lb = i / 16 * 16;
std::ptrdiff_t ub = lb + 16;
if( idxs[i] < 0 )
{
res[i] = 0xff;
continue;
}
if( idxs[i] < lb || idxs[i] > ub ) return res_t {};
res[i] = idxs[i] - lb;
}

return res_t {res};
}
}

template<std::ptrdiff_t G, std::size_t N>
constexpr auto
x86_pshuvb_pattern(const std::array<std::ptrdiff_t, N>& idxs)
{
return x86_pshuvb_pattern<G>(std::span<const std::ptrdiff_t, N>(idxs));
}

}
34 changes: 11 additions & 23 deletions include/eve/detail/shuffle_v2/simd/x86/shuffle_l3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,10 @@ template<typename N, std::ptrdiff_t... I>
EVE_FORCEINLINE auto
x86_pshuvb(pattern_t<I...>, wide<std::uint8_t, N> x)
{
if constexpr( N() == 16 )
{
wide<std::uint8_t, N> mask {I...};
return _mm_shuffle_epi8(x, mask);
}
else if constexpr( N() == 32 )
{
wide<std::uint8_t, N> mask {I..., I...};
return _mm256_shuffle_epi8(x, mask);
}
else
{
wide<std::uint8_t, N> mask {I..., I..., I..., I...};
return _mm512_shuffle_epi8(x, mask);
}
wide<std::uint8_t, N> mask {I...};
if constexpr( N() == 16 ) return _mm_shuffle_epi8(x, mask);
else if constexpr( N() == 32 ) return _mm256_shuffle_epi8(x, mask);
else return _mm512_shuffle_epi8(x, mask);
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
Expand All @@ -37,16 +26,15 @@ shuffle_l3_x86_pshuvb(P, fixed<G>, wide<T, N> x)
{
if constexpr( current_api < ssse3 ) return no_matching_shuffle;
else if constexpr( current_api == avx && P::reg_size == 32 ) return no_matching_shuffle;
else if constexpr( !P::repeated_16 ) return no_matching_shuffle;
else
{
constexpr auto no_we = idxm::replace_we(*P::repeated_16, 0xff);
constexpr auto no_na = idxm::replace_na(no_we, 0xff);
constexpr auto expanded = idxm::expand_group<P::g_size>(no_na);

using u8xN = wide<std::uint8_t, eve::fixed<P::reg_size>>;

return x86_pshuvb(idxm::to_pattern<expanded>(), eve::bit_cast(x, eve::as<u8xN> {}));
constexpr auto pshuvb_pattern = idxm::x86_pshuvb_pattern<G * sizeof(T)>(P::idxs);
if constexpr (!pshuvb_pattern) return no_matching_shuffle;
else
{
using u8xN = wide<std::uint8_t, eve::fixed<P::reg_size>>;
return x86_pshuvb(idxm::to_pattern<*pshuvb_pattern>(), eve::bit_cast(x, eve::as<u8xN> {}));
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion include/eve/detail/shuffle_v2/simd/x86/shuffle_l4_l5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ shuffle_l4_l5_x86_put_u64x2_in_position(P, fixed<G>, wide<T, N> x)
if constexpr( P::reg_size < 32 ) return no;
// there is nothing we can do for shorts on avx
else if constexpr( P::reg_size == 32 && P::g_size <= 2 && current_api == avx ) return no;
else if constexpr( P::has_zeroes && current_api <= avx2 ) return no;
else if constexpr( P::has_zeroes && current_api < avx2 ) return no;
else if constexpr( !P::shuffle_16_first ) return no;
else
{
Expand Down
17 changes: 11 additions & 6 deletions test/unit/api/regular/shuffle_v2/idxm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,11 @@ TTS_CASE("is_repeating_pattern")
no_test(std::array {na_, 0, we_, 3});

// 2 registers [0, 1, 2, 3] [4, 5, 6, 7]
yes_test(std::array{0, 4, 2, 6}, std::array{0, 4});
yes_test(std::array{1, 4, 3, 6}, std::array{1, 4});
yes_test(std::array{1, 4, 3, we_}, std::array{1, 4});
yes_test(std::array{1, we_, 3, 6}, std::array{1, 4});
yes_test(std::array{na_, 4, na_, 6}, std::array{na_, 4});
yes_test(std::array {0, 4, 2, 6}, std::array {0, 4});
yes_test(std::array {1, 4, 3, 6}, std::array {1, 4});
yes_test(std::array {1, 4, 3, we_}, std::array {1, 4});
yes_test(std::array {1, we_, 3, 6}, std::array {1, 4});
yes_test(std::array {na_, 4, na_, 6}, std::array {na_, 4});

no_test(std::array {0, 4, 2, 7});
no_test(std::array {0, 3});
Expand Down Expand Up @@ -837,14 +837,19 @@ TTS_CASE("put bigger group in position")
yes_test(std::array {3, 2, 0, 1}, eve::lane<2>, std::array {1, 0}, std::array {1, 0, 2, 3});
yes_test(std::array {3, 2, na_, 1}, eve::lane<2>, std::array {1, 0}, std::array {1, 0, na_, 3});
yes_test(std::array {3, 2, 3, 2}, eve::lane<2>, std::array {1, 1}, std::array {1, 0, 3, 2});
yes_test(std::array {3, 2, na_, na_}, eve::lane<2>, std::array {1, we_}, std::array {1, 0, na_, na_});
yes_test(
std::array {3, 2, na_, na_}, eve::lane<2>, std::array {1, we_}, std::array {1, 0, na_, na_});
yes_test(std::array {3, 2, 0, 1}, eve::lane<4>, std::array {0}, std::array {3, 2, 0, 1});
yes_test(std::array {3, 2, 0, 1}, eve::lane<4>, std::array {0}, std::array {3, 2, 0, 1});
yes_test(std::array {3, 2, 0, 1}, eve::lane<1>, std::array {3, 2, 0, 1}, std::array {0, 1, 2, 3});
yes_test(std::array {3, 2, 6, 7, 6, 7, 0, 1},
eve::lane<2>,
std::array {1, 3, 3, 0},
std::array {1, 0, 2, 3, 4, 5, 6, 7});
yes_test(std::array {7, na_, na_, na_, na_, na_, na_, na_},
eve::lane<4>,
std::array {1, we_},
std::array {3, na_, na_, na_, na_, na_, na_, na_});

no_test(std::array {3, 0, 0, 1}, eve::lane<2>);
};
Expand Down
9 changes: 4 additions & 5 deletions test/unit/api/regular/shuffle_v2/slide_left_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ TTS_CASE("Slide left 1, example") {
TTS_CASE("Explicit") {
using w_i = eve::wide<std::uint32_t, eve::fixed<8>>;
w_i x{1, 2, 3, 4, 5, 6, 7, 8};
constexpr auto na_ = eve::na_;
auto [y, l] = eve::shuffle_v2_core(x, eve::lane<4>, eve::pattern<0, na_>);
//auto y = eve::slide_left2(x, eve::index<4>);
//TTS_EQUAL(y, w_i({8, 0, 0, 0, 0, 0, 0, 0}));
TTS_EQUAL(l(), 2);
//constexpr auto na_ = eve::na_;
auto y = eve::slide_left2(x, eve::index<7>);
TTS_EQUAL(y, w_i({8, 0, 0, 0, 0, 0, 0, 0}));
//TTS_EQUAL(l(), 2);
};
#endif

Expand Down

0 comments on commit 304fc64

Please sign in to comment.