Skip to content

Commit

Permalink
naming some shuffles (#1642)
Browse files Browse the repository at this point in the history
  • Loading branch information
DenisYaroshevskiy authored Aug 18, 2023
1 parent 920ceaa commit 19c4348
Show file tree
Hide file tree
Showing 45 changed files with 865 additions and 705 deletions.
4 changes: 2 additions & 2 deletions include/eve/detail/function/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include <eve/concept/vectorized.hpp>
#include <eve/detail/abi.hpp>
#include <eve/module/core/regular/swap_adjacent_groups.hpp>
#include <eve/module/core/named_shuffles/swap_adjacent.hpp>
#include <bit>

namespace eve::detail
Expand All @@ -29,7 +29,7 @@ namespace eve::detail

return [&]<std::size_t... I>(std::index_sequence<I...>) mutable
{
((v = f(v,swap_adjacent_groups(v, fixed<(1<<I)>{} ))),...);
((v = f(v,swap_adjacent(v, fixed<(1<<I)>{} ))),...);
return v;
}(std::make_index_sequence<depth>{});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include <eve/traits/product_type.hpp>
#include <eve/module/core/regular/deinterleave_groups_shuffle.hpp>
#include <eve/module/core/regular/swap_adjacent_groups.hpp>
#include <eve/module/core/regular/shuffle.hpp>

namespace eve::detail
Expand Down
74 changes: 71 additions & 3 deletions include/eve/detail/shuffle_v2/idxm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,7 @@ constexpr auto repeated_pattern_of_size = []
{
std::optional<std::array<std::ptrdiff_t, target>> res;
constexpr auto repeated = idxm::reduce_repeated_pattern_until_impl<target, std::array {I...}>();
if constexpr (repeated.size() == target) {
res = repeated;
}
if constexpr( repeated.size() == target ) { res = repeated; }
return res;
}
}();
Expand Down Expand Up @@ -617,6 +615,56 @@ replace_na(const std::array<std::ptrdiff_t, N>& idxs, std::ptrdiff_t with)
return replace_na(std::span<const std::ptrdiff_t, N>(idxs), with);
}

template<std::size_t N>
constexpr auto
just_second_shuffle(std::span<const std::ptrdiff_t, N> idxs, std::ptrdiff_t with)
{
std::array<std::ptrdiff_t, N> res = {};

auto s = (std::ptrdiff_t)N;

for( std::size_t i = 0; i != N; ++i )
{
auto in = idxs[i];
if( 0 <= in && in < s ) res[i] = with;
else if ( in < 0) res[i] = in;
else res[i] = in - s;
}
return res;
}

template<std::size_t N>
constexpr auto
just_second_shuffle(const std::array<std::ptrdiff_t, N>& idxs, std::ptrdiff_t with)
{
return just_second_shuffle(std::span<const std::ptrdiff_t, N>(idxs), with);
}

template<std::size_t N>
constexpr auto
just_first_shuffle(std::span<const std::ptrdiff_t, N> idxs, std::ptrdiff_t with)
{
std::array<std::ptrdiff_t, N> res = {};

auto s = (std::ptrdiff_t)N;

for( std::size_t i = 0; i != N; ++i )
{
auto in = idxs[i];
if( in >= s ) res[i] = with;
else res[i] = in;
}

return res;
}

template<std::size_t N>
constexpr auto
just_first_shuffle(const std::array<std::ptrdiff_t, N>& idxs, std::ptrdiff_t with)
{
return just_first_shuffle(std::span<const std::ptrdiff_t, N>(idxs), with);
}

constexpr bool
is_blend(std::span<const std::ptrdiff_t> idxs, std::ptrdiff_t cardinal)
{
Expand Down Expand Up @@ -814,6 +862,26 @@ split_to_groups(const std::array<std::ptrdiff_t, N>& idxs)
return split_to_groups<G>(std::span<const std::ptrdiff_t, N>(idxs));
}

constexpr auto add_shuffle_levels(std::span<const std::ptrdiff_t> ls) {
std::ptrdiff_t base = 0;
std::ptrdiff_t use_masks = 0;

for (auto l : ls) {
base += l & (~1);
use_masks |= l & 1;
}
return base + use_masks;
}

constexpr auto add_shuffle_levels(std::array<std::ptrdiff_t, 3> ls) {
return add_shuffle_levels(std::span(ls));
}

template <std::ptrdiff_t ... ls>
constexpr auto add_shuffle_levels(eve::index_t<ls>... ) {
return index<add_shuffle_levels(std::array{ls...})>;
}

} // namespace eve::detail::idxm

#if defined(EVE_INCLUDE_X86_HEADER)
Expand Down
12 changes: 12 additions & 0 deletions include/eve/detail/shuffle_v2/native_shuffle_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,21 @@ struct expanded_pattern_t : pattern_t<I...>
static constexpr auto repeated_8 = idxm::repeated_pattern_of_size<8 / g_size, I...>;
static constexpr auto repeated_16 = idxm::repeated_pattern_of_size<16 / g_size, I...>;
static constexpr auto repeated_32 = idxm::repeated_pattern_of_size<32 / g_size, I...>;

static constexpr std::array xy_swapped = idxm::swap_xy(idxs, std::ssize(idxs));
};

template<simd_value T, std::ptrdiff_t G, std::ptrdiff_t... I>
constexpr expanded_pattern_t<T, G, I...> expanded_pattern;

template<simd_value T, std::ptrdiff_t G, std::ptrdiff_t... I>
EVE_FORCEINLINE auto
shuffle_2_using_or(pattern_t<I...>, fixed<G> g, T x, T y)
{
constexpr std::array idxs{I...};
auto [x_, xl] = shuffle_v2_core(x, g, idxm::to_pattern<idxm::just_first_shuffle(idxs, na_)>());
auto [y_, yl] = shuffle_v2_core(y, g, idxm::to_pattern<idxm::just_second_shuffle(idxs, na_)>());
return kumi::tuple{ x_ | y_, idxm::add_shuffle_levels(xl, yl, eve::index<2>) };
}

}
4 changes: 4 additions & 0 deletions include/eve/detail/shuffle_v2/shuffle_l2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,7 @@ EVE_CALLABLE_API(shuffle_l2_, shuffle_l2)
#if defined(EVE_INCLUDE_ARM_HEADER)
# include <eve/detail/shuffle_v2/simd/arm/neon/shuffle_l2.hpp>
#endif

#if defined(EVE_INCLUDE_SVE_HEADER)
# include <eve/detail/shuffle_v2/simd/arm/sve/shuffle_l2.hpp>
#endif
2 changes: 2 additions & 0 deletions include/eve/detail/shuffle_v2/shuffle_l6_l7.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ namespace detail
EVE_CALLABLE_API(shuffle_l6_l7_, shuffle_l6_l7)
}

#include <eve/detail/shuffle_v2/simd/common/shuffle_l6_l7.hpp>

#if defined(EVE_INCLUDE_X86_HEADER)
# include <eve/detail/shuffle_v2/simd/x86/shuffle_l6_l7.hpp>
#endif
2 changes: 1 addition & 1 deletion include/eve/detail/shuffle_v2/shuffle_v2_fwd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ namespace eve
//! If to produce 4 results we need to apply it 4 times it doesn't become more complex,
//! you just have more work to do.
//! From a practicle stand point, we want to be able to say:
//! "swap_adjacent_groups is at most level 5", and not "5 * number of output registers"
//! "swap_adjacent is at most level 5", and not "5 * number of output registers"
//!
//! If it proves to be important to accumulate all shuffles, we will change it in the future.
//! @}
Expand Down
39 changes: 35 additions & 4 deletions include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ vcopy_lane(eve::wide<T, N> x, eve::index_t<To>, eve::wide<T, N> y, eve::index_t<
}
else
{
if constexpr( sizeof(T) == 8 ) return vcopy_laneq_u64(x, To, y, From);
else if constexpr( sizeof(T) == 4 ) return vcopy_laneq_u32(x, To, y, From);
else if constexpr( sizeof(T) == 2 ) return vcopy_laneq_u16(x, To, y, From);
else return vcopy_laneq_u8(x, To, y, From);
if constexpr( sizeof(T) == 8 ) return vcopyq_laneq_u64(x, To, y, From);
else if constexpr( sizeof(T) == 4 ) return vcopyq_laneq_u32(x, To, y, From);
else if constexpr( sizeof(T) == 2 ) return vcopyq_laneq_u16(x, To, y, From);
else return vcopyq_laneq_u8(x, To, y, From);
}
}

Expand Down Expand Up @@ -215,4 +215,35 @@ requires(P::out_reg_size == P::reg_size)
else return no_matching_shuffle_t {};
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l2_neon_copy_lane_other(P, fixed<G>, wide<T, N> x, wide<T, N> y)
{
constexpr auto to_from0 = idxm::is_just_setting_one_lane(P::idxs);
constexpr auto to_from1 = idxm::is_just_setting_one_lane(P::xy_swapped);

if constexpr( current_api < asimd ) return no_matching_shuffle_t {};
else if constexpr( to_from0 )
{
return vcopy_lane(x, eve::index<(*to_from0)[0]>, y, eve::index<(*to_from0)[1] - N::value * G>);
}
else if constexpr( to_from1 )
{
return vcopy_lane(y, eve::index<(*to_from1)[0]>, x, eve::index<(*to_from1)[1] - N::value * G>);
}
else return no_matching_shuffle_t {};
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l2_(EVE_SUPPORTS(neon128_), P p, fixed<G> g, wide<T, N> x, wide<T, N> y)
requires(P::out_reg_size == P::reg_size)
{
if constexpr( auto r = shuffle_l2_neon_copy_lane_other(p, g, x, y); matched_shuffle<decltype(r)> )
{
return r;
}
else return no_matching_shuffle_t {};
}

}
24 changes: 24 additions & 0 deletions include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
//==================================================================================================
#pragma once

#include <eve/module/core/regular/bit_select.hpp>

namespace eve::detail
{

Expand Down Expand Up @@ -56,6 +58,19 @@ shuffle_l3_neon_tbl(P, fixed<G>, wide<T, N> x)
}
}


template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l3_neon_bit_select(P, fixed<G>, wide<T, N> x, wide<T, N> y)
{
if constexpr ( !idxm::is_blend(P::idxs, N::value / G) ) return no_matching_shuffle_t{};
else
{
eve::logical<wide<T, N>> m([](int i, int size) { return P::idxs[i / G] >= size / G; });
return eve::bit_select(m, y, x);
}
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l3_(EVE_SUPPORTS(neon128_), P p, fixed<G> g, wide<T, N> x)
Expand All @@ -66,4 +81,13 @@ shuffle_l3_(EVE_SUPPORTS(neon128_), P p, fixed<G> g, wide<T, N> x)
else return no_matching_shuffle_t {};
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l3_(EVE_SUPPORTS(neon128_), P p, fixed<G> g, wide<T, N> x, wide<T, N> y)
requires(P::out_reg_size == P::reg_size)
{
if constexpr( auto r = shuffle_l3_neon_bit_select(p, g, x, y); matched_shuffle<decltype(r)> ) return r;
else return no_matching_shuffle_t {};
}

}
40 changes: 40 additions & 0 deletions include/eve/detail/shuffle_v2/simd/arm/sve/shuffle_l2.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//==================================================================================================
/*
EVE - Expressive Vector Engine
Copyright : EVE Project Contributors
SPDX-License-Identifier: BSL-1.0
*/
//==================================================================================================
#pragma once

#include <eve/module/core/regular/if_else.hpp>

namespace eve::detail
{

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l2_sve_blend(P, fixed<G>, wide<T, N> x, wide<T, N> y)
{
// here using idxs, not idxs2match, no zeroing blend on sve
if constexpr( !idxm::is_blend(P::idxs, N::value / G) ) return no_matching_shuffle;
else
{
eve::logical<wide<T, N>> m([](int i, int size) { return P::idxs[i / G] >= size / G; });
return wide<T, N>{svsel(m, y, x)};
}
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l2_(EVE_SUPPORTS(sve_), P p, fixed<G> g, wide<T, N> x, wide<T, N> y)
requires(P::out_reg_size == P::reg_size)
{
if constexpr( auto r = shuffle_l2_sve_blend(p, g, x, y); matched_shuffle<decltype(r)> )
{
return r;
}
else return no_matching_shuffle_t {};
}

}
41 changes: 41 additions & 0 deletions include/eve/detail/shuffle_v2/simd/common/shuffle_l6_l7.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//==================================================================================================
/*
EVE - Expressive Vector Engine
Copyright : EVE Project Contributors
SPDX-License-Identifier: BSL-1.0
*/
//==================================================================================================
#pragma once

namespace eve::detail
{

template<typename P, logical_simd_value T, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l6_l7_blend_nonwide_logicals(P, fixed<G>, T x, T y)
{
if constexpr( !idxm::is_blend(P::idxs, T::size() / G) )
{
return kumi::tuple {no_matching_shuffle, eve::index<-1>};
}
else
{
T m([](int i, int size) { return P::idxs[i / G] >= size / G; });
return kumi::tuple {(x && !m) || (y && m), eve::index<6>};
}
}

// common non wide logical thinking
template<typename P, logical_simd_value T, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l6_l7_(EVE_SUPPORTS(cpu_), P p, fixed<G> g, T x, T y)
{
if constexpr( auto r = shuffle_l6_l7_blend_nonwide_logicals(p, g, x, y);
matched_shuffle<decltype(get<0>(r))> )
{
return r;
}
else return no_matching_shuffle;
}

}
14 changes: 14 additions & 0 deletions include/eve/detail/shuffle_v2/simd/common/shuffle_l_fallback.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,18 @@ requires(!abi_t<T, N>::is_wide_logical) && requires { shuffle_v2_core(x.bits(),
// to mask and from mask + 2 each
return kumi::tuple {to_logical(shuffled), eve::index<decltype(l)::value + 4>};
}


template<simd_value T, std::ptrdiff_t G, std::ptrdiff_t... I>
EVE_FORCEINLINE auto
shuffle_l_fallback_(EVE_SUPPORTS(cpu_), pattern_t<I...> p, fixed<G> g, T x, T y)
{
// sse2 has no blend. Other should try blend/no blend options.
if constexpr ( eve::current_api < eve::sse4_1 )
{
return shuffle_2_using_or(p, g, x, y);
}
else return kumi::tuple{no_matching_shuffle, eve::index<-1>};
}

}
24 changes: 24 additions & 0 deletions include/eve/detail/shuffle_v2/simd/ppc/shuffle_l3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,30 @@ shuffle_l3_(EVE_SUPPORTS(vmx_), P p, fixed<G> g, wide<T, N> x)
}
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l3_ppc_vec_sel(P, fixed<G>, wide<T, N> x, wide<T, N> y)
{
if constexpr( !idxm::is_blend(P::idxs, N::value / G) ) return no_matching_shuffle;
else
{
eve::logical<wide<T, N>> m([](int i, int size) { return P::idxs[i / G] >= size / G; });
return vec_sel(x.storage(), y.storage(), m.storage());
}
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l3_(EVE_SUPPORTS(vmx_), P p, fixed<G> g, wide<T, N> x, wide<T, N> y)
requires(P::out_reg_size == P::reg_size)
{
if constexpr( auto r = shuffle_l3_ppc_vec_sel(p, g, x, y); matched_shuffle<decltype(r)> )
{
return r;
}
else return no_matching_shuffle_t {};
}

}

// return vec_perm(what.storage(), what.storage(), pattern.storage());
Loading

0 comments on commit 19c4348

Please sign in to comment.