Skip to content

Commit

Permalink
small clean up (#1639)
Browse files Browse the repository at this point in the history
  • Loading branch information
DenisYaroshevskiy authored Aug 6, 2023
1 parent cf482af commit 938ef12
Show file tree
Hide file tree
Showing 17 changed files with 491 additions and 446 deletions.
2 changes: 1 addition & 1 deletion cmake/toolchain/gcc.sve128.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ set(CMAKE_SYSTEM_PROCESSOR arm )
set(EVE_USE_PCH OFF) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106491
set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-12 )
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-12 )
set(CMAKE_CXX_FLAGS "-Wno-psabi -DEVE_NO_FORCEINLINE ${EVE_OPTIONS} -march=armv8-a+sve2 -msve-vector-bits=128" )
set(CMAKE_CXX_FLAGS "-Wno-psabi -DEVE_NO_FORCEINLINE ${EVE_OPTIONS} -march=armv8-a+sve -msve-vector-bits=128" )

set(CMAKE_CROSSCOMPILING_CMD ${PROJECT_SOURCE_DIR}/cmake/toolchain/run_sve128.sh )
2 changes: 1 addition & 1 deletion cmake/toolchain/gcc.sve256.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ set(CMAKE_SYSTEM_PROCESSOR arm )
set(EVE_USE_PCH OFF) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106491
set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-12 )
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-12 )
set(CMAKE_CXX_FLAGS "-Wno-psabi -DEVE_NO_FORCEINLINE ${EVE_OPTIONS} -march=armv8-a+sve2 -msve-vector-bits=256" )
set(CMAKE_CXX_FLAGS "-Wno-psabi -DEVE_NO_FORCEINLINE ${EVE_OPTIONS} -march=armv8-a+sve -msve-vector-bits=256" )

set(CMAKE_CROSSCOMPILING_CMD ${PROJECT_SOURCE_DIR}/cmake/toolchain/run_sve256.sh )
2 changes: 1 addition & 1 deletion cmake/toolchain/gcc.sve512.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ set(CMAKE_SYSTEM_PROCESSOR arm )
set(EVE_USE_PCH OFF) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106491
set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-12 )
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-12 )
set(CMAKE_CXX_FLAGS "-Wno-psabi -DEVE_NO_FORCEINLINE ${EVE_OPTIONS} -march=armv8-a+sve2 -msve-vector-bits=512" )
set(CMAKE_CXX_FLAGS "-Wno-psabi -DEVE_NO_FORCEINLINE ${EVE_OPTIONS} -march=armv8-a+sve -msve-vector-bits=512" )

set(CMAKE_CROSSCOMPILING_CMD qemu-aarch64)
3 changes: 1 addition & 2 deletions include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,7 @@ template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l2_neon_dup_lane(P, fixed<G>, wide<T, N> x)
{
constexpr auto lane = idxm::is_lane_broadcast(P::idxs);
constexpr bool is_8_bytes = sizeof(T) * N::value == 8;
constexpr auto lane = idxm::is_lane_broadcast(P::idxs);

if constexpr( !lane ) return no_matching_shuffle_t {};
else
Expand Down
4 changes: 2 additions & 2 deletions include/eve/detail/shuffle_v2/simd/arm/neon/shuffle_l3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ namespace eve::detail

template<typename N, std::ptrdiff_t... I>
EVE_FORCEINLINE auto
neon_vtbl(wide<std::uint8_t, N> x, pattern_t<I...> p)
neon_vtbl(wide<std::uint8_t, N> x, pattern_t<I...>)
{
if constexpr( N::value == 8 ) return vtbl1_u8(x, wide<std::uint8_t, N> {I...});
else return vqtbl1q_u8(x, wide<std::uint8_t, N> {I...});
}

template<typename N, std::ptrdiff_t... I>
EVE_FORCEINLINE auto
neon_vtbl(wide<std::uint8_t, N> x, wide<std::uint8_t, N> y, pattern_t<I...> p)
neon_vtbl(wide<std::uint8_t, N> x, wide<std::uint8_t, N> y, pattern_t<I...>)
{
if constexpr( N::value == 8 ) return vtbx1_u8(x, y, wide<std::uint8_t, N> {I...});
else return vqtbx1_u8(x, y, wide<std::uint8_t, N> {I...});
Expand Down
13 changes: 0 additions & 13 deletions include/eve/detail/shuffle_v2/simd/arm/sve/shuffle_l3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,4 @@ shuffle_l3_(EVE_SUPPORTS(sve_), P, fixed<G>, wide<T, N> x)
return sve_tbl(x, table_idxs);
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
EVE_FORCEINLINE auto
shuffle_l3_(EVE_SUPPORTS(sve_), P, fixed<G>, wide<T, N> x, wide<T, N> y)
{
constexpr auto table_idxs =
idxm::to_pattern<idxm::expand_group<G>(idxm::replace_we(P::idxs, eve::na_))>();

// hard to say if this is trully l3, the registers have to be moved in the struct.
// I guess we'll see.
return sve_tbl2(x, y, table_idxs);
}


}
155 changes: 1 addition & 154 deletions include/eve/detail/shuffle_v2/simd/common/shuffle_v2_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//==================================================================================================
#pragma once

#include <eve/arch/fundamental_cardinal.hpp>
#include <eve/detail/shuffle_v2/simplify_plain_shuffle.hpp>

namespace eve::detail
{
Expand Down Expand Up @@ -60,159 +60,6 @@ shuffle_emulated(pattern_t<I...>, fixed<G>, kumi::tuple<Ts...> xs)

// upscale pattern-------

template<std::size_t N>
constexpr auto
upscale_pattern_impl(std::array<std::ptrdiff_t, N> p)
-> std::optional<std::array<std::ptrdiff_t, N / 2>>
{
if( N == 1 ) return std::nullopt;

std::array<std::ptrdiff_t, N / 2> res {};

for( int i = 0; i != N / 2; i += 1 )
{
int i2 = i + i;
std::ptrdiff_t i0 = p[i2];
std::ptrdiff_t i1 = p[i2 + 1];

if( i0 == na_ || i1 == na_ )
{
if( i0 == i1 || i0 == we_ || i1 == we_ )
{
res[i] = na_;
continue;
}
return std::nullopt;
}

if( i0 == we_ && i1 == we_ )
{
res[i] = we_;
continue;
}

if( i0 == we_ ) i0 = i1 - 1;
if( i1 == we_ ) i1 = i0 + 1;

if( i0 + 1 != i1 || i0 % 2 != 0 ) { return std::nullopt; }

res[i] = i0 / 2;
}

return res;
}

template<std::ptrdiff_t... I>
constexpr auto
upscale_pattern(pattern_t<I...> p)
{
constexpr std::array p_arr {I...};
constexpr std::optional attempt = upscale_pattern_impl(p_arr);
if constexpr( !attempt ) return p;
else return idxm::to_pattern<*attempt>();
}

template<typename U, typename... Ts>
EVE_FORCEINLINE auto
bit_cast_tuple(kumi::tuple<Ts...> xs, eve::as<U>)
{
return kumi::map([](auto x) { return eve::bit_cast(x, eve::as<U> {}); }, xs);
};

template<typename G, typename P, typename... Ts> struct simplified_pattern
{
kumi::tuple<Ts...> x;
G g;
P p;
};

template<typename G, typename P, typename... Ts>
simplified_pattern(kumi::tuple<Ts...>, G, P) -> simplified_pattern<G, P, Ts...>;

template<std::ptrdiff_t G, std::ptrdiff_t... I, typename T, typename... Ts>
EVE_FORCEINLINE auto
simplify_plain_up_the_type(pattern_t<I...> p, eve::fixed<G> g, kumi::tuple<T, Ts...> xs)
{
// non wide logicals
if constexpr( eve::logical_value<T> )
{
if constexpr( !eve::unsigned_value<typename T::mask_type> )
{
using e_t = eve::element_type_t<typename T::mask_type>;
using N = eve::fixed<T::size()>;

using u_t = typename T::template rebind<detail::make_integer_t<sizeof(e_t), unsigned>, N>;
return simplify_plain_up_the_type(p, g, bit_cast_tuple(xs, as<u_t> {}));
}
else return simplified_pattern {xs, g, p};
}
else if constexpr( !std::unsigned_integral<eve::element_type_t<T>> )
{
using e_t = eve::element_type_t<T>;
using N = eve::fixed<T::size()>;
using u_t = eve::wide<detail::make_integer_t<sizeof(e_t), unsigned>, N>;
return simplify_plain_up_the_type(p, g, bit_cast_tuple(xs, as<u_t> {}));
}
else if constexpr( G >= 2 && sizeof(eve::element_type_t<T>) < 8 )
{
using e_t = eve::element_type_t<T>;
using N = eve::fixed<T::size() / 2>;
using up_t = eve::wide<detail::make_integer_t<sizeof(e_t) * 2, unsigned>, N>;
return simplify_plain_up_the_type(p, eve::lane<G / 2>, bit_cast_tuple(xs, as<up_t> {}));
}
else { return simplified_pattern {xs, g, p}; }
}

template<std::ptrdiff_t G, std::ptrdiff_t... I, typename T, typename... Ts>
EVE_FORCEINLINE auto
simplify_plain_swap_input_wides(pattern_t<I...> p, eve::fixed<G> g, kumi::tuple<T, Ts...> xs)
{
constexpr std::array idxs {I...};

if constexpr( sizeof...(Ts) != 1 ) return simplify_plain_up_the_type(p, g, xs);
else if constexpr( constexpr auto swapped = idxm::swap_xy(idxs, T::size() / G); swapped < idxs )
{
return simplify_plain_up_the_type(idxm::to_pattern<swapped>(), g, kumi::reverse(xs));
}
else return simplify_plain_up_the_type(p, g, xs);
}

template<std::ptrdiff_t G, std::ptrdiff_t... I, typename... Ts>
EVE_FORCEINLINE auto
simplify_plain_shuffle_upscale_pattern(pattern_t<I...>, eve::fixed<G> g, kumi::tuple<Ts...> xs)
{
constexpr auto p = pattern<I...>;
constexpr auto up = upscale_pattern(p);

if constexpr( up != p ) return simplify_plain_shuffle_upscale_pattern(up, eve::lane<G * 2>, xs);
else return simplify_plain_swap_input_wides(p, g, xs);
}

template<std::ptrdiff_t G, std::ptrdiff_t... I, typename T, typename... Ts>
EVE_FORCEINLINE auto
simplify_plain_pad_fundamental(pattern_t<I...> p, eve::fixed<G> g, kumi::tuple<T, Ts...> xs)
{
constexpr std::ptrdiff_t f_n = fundamental_cardinal_v<eve::element_type_t<T>>;
if constexpr( f_n > T::size() || (f_n / G) > pattern_t<I...>::size() )
{
constexpr auto p2_arr =
idxm::fix_indexes_to_fundamental<f_n / G>(std::array {I...}, T::size() / G);
constexpr auto p2 = idxm::to_pattern<p2_arr>();

using N1 = fixed<std::max(T::size(), f_n)>;
using T1 = typename T::template rescale<N1>;
return simplify_plain_shuffle_upscale_pattern(p2, g, bit_cast_tuple(xs, as<T1> {}));
}
else return simplify_plain_shuffle_upscale_pattern(p, g, xs);
}

template<std::ptrdiff_t G, std::ptrdiff_t... I, typename... Ts>
EVE_FORCEINLINE auto
simplify_plain_shuffle(pattern_t<I...> p, eve::fixed<G> g, kumi::tuple<Ts...> xs)
{
return simplify_plain_pad_fundamental(p, g, xs);
}

template<typename Tuple>
constexpr auto
shuffle_v2_combined_l()
Expand Down
Loading

0 comments on commit 938ef12

Please sign in to comment.