From 36ee952eaba93fddf98dc1ee044969e539e53030 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 24 Oct 2024 13:34:09 +0200 Subject: [PATCH] cleanup --- .../detail/function/simd/x86/interleave.hpp | 122 +++++++++--------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/include/eve/detail/function/simd/x86/interleave.hpp b/include/eve/detail/function/simd/x86/interleave.hpp index c503ae6c2f..7c51b33198 100644 --- a/include/eve/detail/function/simd/x86/interleave.hpp +++ b/include/eve/detail/function/simd/x86/interleave.hpp @@ -17,7 +17,7 @@ namespace eve::detail // Interleave pairs of wides //================================================================================================ template - EVE_FORCEINLINE kumi::tuple, wide> interleave_(EVE_REQUIRES(sse2_), O const& o, wide a, wide b) noexcept + EVE_FORCEINLINE kumi::tuple, wide> interleave_(EVE_REQUIRES(sse2_) , O const& o, wide v0, wide v1) noexcept requires (N::value > 1) && x86_abi> { using type = wide; @@ -25,16 +25,16 @@ namespace eve::detail if constexpr( c == category::float64x2 ) { - return kumi::make_tuple(type(_mm_unpacklo_pd(a, b)),type(_mm_unpackhi_pd(a, b))); + return kumi::make_tuple(type(_mm_unpacklo_pd(v0, v1)),type(_mm_unpackhi_pd(v0, v1))); } else if constexpr( match(c,category::int64x2, category::uint64x2) ) { - return kumi::make_tuple(type(_mm_unpacklo_epi64(a,b)), type(_mm_unpackhi_epi64(a,b))); + return kumi::make_tuple(type(_mm_unpacklo_epi64(v0,v1)), type(_mm_unpackhi_epi64(v0,v1))); } else if constexpr( c == category::float64x4 ) { - auto[a0,a1] = a.slice(); - auto[b0,b1] = b.slice(); + auto[a0,a1] = v0.slice(); + auto[b0,b1] = v1.slice(); auto[x0,x1] = eve::interleave(a0,b0); auto[y0,y1] = eve::interleave(a1,b1); @@ -44,14 +44,14 @@ namespace eve::detail { if constexpr(N::value == 2) { - auto xy = _mm_shuffle_ps(a,b,0b10011001); - return kumi::make_tuple ( type(_mm_unpacklo_ps(a,b)) + auto xy = _mm_shuffle_ps(v0,v1,0b10011001); + return kumi::make_tuple ( type(_mm_unpacklo_ps(v0,v1)) , type(_mm_shuffle_ps(xy,xy,0b11011000)) ); } else if constexpr(N::value == 4) { - return kumi::make_tuple(type(_mm_unpacklo_ps(a,b)), type(_mm_unpackhi_ps(a,b))); + return kumi::make_tuple(type(_mm_unpacklo_ps(v0,v1)), type(_mm_unpackhi_ps(v0,v1))); } } else if constexpr( match(c,category::int32x4, category::uint32x4) ) @@ -59,18 +59,18 @@ namespace eve::detail if constexpr(N::value == 2) { using ftype = as,N>>; - auto that = interleave(bit_cast(a,ftype()),bit_cast(b,ftype())); - return kumi::map( [](auto m){ return bit_cast(m,as{}); }, that); + auto that = interleave(bit_cast(v0,ftype()),bit_cast(v1,ftype())); + return kumi::map( [](auto m){ return bit_cast(m,as()); }, that); } else if constexpr(N::value == 4) { - return kumi::make_tuple(type(_mm_unpacklo_epi32(a,b)), type(_mm_unpackhi_epi32(a,b))); + return kumi::make_tuple(type(_mm_unpacklo_epi32(v0,v1)), type(_mm_unpackhi_epi32(v0,v1))); } } else if constexpr( c == category::float32x8 ) { - auto[a0,a1] = a.slice(); - auto[b0,b1] = b.slice(); + auto[a0,a1] = v0.slice(); + auto[b0,b1] = v1.slice(); auto[x0,x1] = eve::interleave(a0,b0); auto[y0,y1] = eve::interleave(a1,b1); @@ -80,11 +80,11 @@ namespace eve::detail { if constexpr(N::value == 8) { - return kumi::make_tuple(type(_mm_unpacklo_epi16(a,b)), type(_mm_unpackhi_epi16(a,b))); + return kumi::make_tuple(type(_mm_unpacklo_epi16(v0,v1)), type(_mm_unpackhi_epi16(v0,v1))); } else { - wide> v01 = _mm_unpacklo_epi16(a.storage(),b.storage()); + wide> v01 = _mm_unpacklo_epi16(v0.storage(),v1.storage()); if constexpr(N::value == 2 ) { auto[l,h] = v01.slice(lower_).slice(); @@ -101,11 +101,11 @@ namespace eve::detail { if constexpr(N::value == 16) { - return kumi::make_tuple(type(_mm_unpacklo_epi8(a,b)), type(_mm_unpackhi_epi8(a,b))); + return kumi::make_tuple(type(_mm_unpacklo_epi8(v0,v1)), type(_mm_unpackhi_epi8(v0,v1))); } else { - wide> v01 = _mm_unpacklo_epi8(a.storage(),b.storage()); + wide> v01 = _mm_unpacklo_epi8(v0.storage(),v1.storage()); if constexpr(N::value == 2) { @@ -126,20 +126,20 @@ namespace eve::detail } else if constexpr( match(c,category::int8x16, category::uint8x16) ) { - if constexpr (N() == 16) return kumi::make_tuple(type(_mm_unpacklo_epi8(a, b)), type(_mm_unpackhi_epi8(a, b))); + if constexpr (N() == 16) return kumi::make_tuple(type(_mm_unpacklo_epi8(v0, v1)), type(_mm_unpackhi_epi8(v0, v1))); else { - type combined = _mm_unpacklo_epi8(a, b); + type combined = _mm_unpacklo_epi8(v0, v1); auto [lo, hi] = combined.slice(); - return kumi::make_tuple(eve::bit_cast(lo, eve::as{a}), eve::bit_cast(lo, eve::as{b})); + return kumi::make_tuple(eve::bit_cast(lo, eve::as(v0)), eve::bit_cast(lo, eve::as(v1))); } } else if constexpr ( match(c,category::int8x32, category::uint8x32, category::int16x16, category::uint16x16) && current_api >= avx2 ) { - type ul_lanes = sizeof(T) == 2 ? _mm256_unpacklo_epi16(a, b) : _mm256_unpacklo_epi8(a, b); - type uh_lanes = sizeof(T) == 2 ? _mm256_unpackhi_epi16(a, b) : _mm256_unpackhi_epi8(a, b); + type ul_lanes = sizeof(T) == 2 ? _mm256_unpacklo_epi16(v0, v1) : _mm256_unpacklo_epi8(v0, v1); + type uh_lanes = sizeof(T) == 2 ? _mm256_unpackhi_epi16(v0, v1) : _mm256_unpackhi_epi8(v0, v1); type ul = _mm256_permute2f128_si256(ul_lanes, uh_lanes, 0x20); type uh = _mm256_permute2f128_si256(ul_lanes, uh_lanes, 0x31); return kumi::make_tuple(ul, uh); @@ -147,8 +147,8 @@ namespace eve::detail else if constexpr ( match(c, category::int8x32 , category::uint8x32, category::int16x16, category::uint16x16) ) { - auto [a0, a1] = a.slice(); - auto [b0, b1] = b.slice(); + auto [a0, a1] = v0.slice(); + auto [b0, b1] = v1.slice(); auto [ab00, ab01] = interleave(a0, b0); auto [ab10, ab11] = interleave(a1, b1); return kumi::make_tuple(type(ab00, ab01), type(ab10, ab11)); @@ -156,8 +156,8 @@ namespace eve::detail else if constexpr ( match(c, category::int8x64 , category::uint8x64, category::int16x32, category::uint16x32) ) { - type ul_lanes = sizeof(T) == 2 ? _mm512_unpacklo_epi16(a, b) : _mm512_unpacklo_epi8(a, b); - type uh_lanes = sizeof(T) == 2 ? _mm512_unpackhi_epi16(a, b) : _mm512_unpackhi_epi8(a, b); + type ul_lanes = sizeof(T) == 2 ? _mm512_unpacklo_epi16(v0, v1) : _mm512_unpacklo_epi8(v0, v1); + type uh_lanes = sizeof(T) == 2 ? _mm512_unpackhi_epi16(v0, v1) : _mm512_unpackhi_epi8(v0, v1); // Can't use shuffle_i32x4, only applies within lanes using idx_t = typename type::template rebind>; @@ -170,14 +170,14 @@ namespace eve::detail } else { - return interleave.behavior(as, wide>>{}, cpu_{}, o, a, b); + return interleave.behavior(as, wide>>{}, cpu_{}, o, v0, v1); } } template EVE_FORCEINLINE auto interleave_( EVE_REQUIRES(sse2_) , O const& o - , logical> a, logical> b + , logical> v0, logical> v1 ) noexcept requires (N::value > 1) && x86_abi> { @@ -198,8 +198,8 @@ namespace eve::detail else return _pdep_u64(v,0xAAAAAAAAAAAAAAAAULL); }; - auto x = a.storage().value; - auto y = b.storage().value; + auto x = v0.storage().value; + auto y = v1.storage().value; auto lx = deposit_low (x); auto ly = deposit_high(y); @@ -214,7 +214,7 @@ namespace eve::detail } else { - return interleave.behavior(as, wide>>{}, cpu_{}, o, a, b); + return interleave.behavior(as, wide>>{}, cpu_{}, o, v0, v1); } } @@ -224,48 +224,48 @@ namespace eve::detail template EVE_FORCEINLINE auto interleave_( EVE_REQUIRES(sse2_) , O const& o - , wide a, wide b, wide c + , wide v0, wide v1, wide v2 ) noexcept requires (N::value > 1) && x86_abi> { using type = wide; - constexpr auto cat = categorize(); + constexpr auto c = categorize(); - if constexpr( cat == category::float32x4 ) + if constexpr( c == category::float32x4 ) { if constexpr(N::value == 2) { - return interleave.behavior(as, wide, wide>>{}, cpu_{}, o, a, b, c); + return interleave.behavior(as, wide>>{}, cpu_{}, o, v0, v1, v2); } else if constexpr(N::value == 4) { - type xy = _mm_unpacklo_ps(a,b); - type xz = _mm_shuffle_ps(a,c,197); + type xy = _mm_unpacklo_ps(v0,v1); + type xz = _mm_shuffle_ps(v0,v2,197); xy = _mm_shuffle_ps(xy,xz,36); - xz = _mm_shuffle_ps(c,b,17); + xz = _mm_shuffle_ps(v2,v1,17); - auto dy = eve::bit_cast(b, as>>{}); - dy = _mm_unpackhi_pd(dy,eve::bit_cast(a, as>>{})); - auto yyx = eve::bit_cast(dy, as{}); + auto dy = eve::bit_cast(v1, eve::as>>()); + dy = _mm_unpackhi_pd(dy,eve::bit_cast(v0, eve::as>>())); + auto yyx = eve::bit_cast(dy, eve::as()); - xz = _mm_shuffle_ps(xz, yyx, 34); - a = _mm_shuffle_ps(a, c, 35); - c = _mm_shuffle_ps(c, b,255); - a = _mm_shuffle_ps(a, c, 34); + xz = _mm_shuffle_ps(xz,yyx, 34); + v0 = _mm_shuffle_ps(v0, v2, 35); + v2 = _mm_shuffle_ps(v2, v1,255); + v0 = _mm_shuffle_ps(v0, v2, 34); - return kumi::make_tuple(xy, xz, a); + return kumi::make_tuple(xy,xz,v0); } } - else if constexpr( match(cat,category::int32x4, category::uint32x4) ) + else if constexpr( match(c,category::int32x4, category::uint32x4) ) { using ftype = as,N>>; - auto that = interleave(bit_cast(a,ftype()),bit_cast(b,ftype()),bit_cast(c,ftype())); - return kumi::map( [](auto m){ return bit_cast(m,as{}); }, that); + auto that = interleave(bit_cast(v0,ftype()),bit_cast(v1,ftype()),bit_cast(v2,ftype())); + return kumi::map( [](auto m){ return bit_cast(m,as()); }, that); } else { - return interleave.behavior(as, wide, wide>>{}, cpu_{}, o, a, b, c); + return interleave.behavior(as, wide>>{}, cpu_{}, o, v0, v1, v2); } } @@ -275,23 +275,23 @@ namespace eve::detail template EVE_FORCEINLINE auto interleave_( EVE_REQUIRES(sse2_) , O const& o - , wide a, wide b, wide c, wide d + , wide v0, wide v1, wide v2, wide v3 ) noexcept requires (N::value > 1) && x86_abi> { using type = wide; - constexpr auto cat = categorize(); + constexpr auto c = categorize(); - if constexpr( cat == category::float32x4 ) + if constexpr( c == category::float32x4 ) { if constexpr(N::value == 2) { - return interleave.behavior(as, wide, wide, wide>>{}, cpu_{}, o, a, b, c, d); + return interleave.behavior(as, wide>>{}, cpu_{}, o, v0, v1, v2, v3); } else if constexpr(N::value == 4) { - auto[t0,t1] = interleave(a,c); - auto[t2,t3] = interleave(b,d); + auto[t0,t1] = interleave(v0,v2); + auto[t2,t3] = interleave(v1,v3); auto[r0,r1] = interleave(t0,t2); auto[r2,r3] = interleave(t1,t3); @@ -299,17 +299,17 @@ namespace eve::detail return kumi::tuple{r0,r1,r2,r3}; } } - else if constexpr( match(cat, category::int32x4, category::uint32x4) ) + else if constexpr( match(c,category::int32x4, category::uint32x4) ) { using ftype = as,N>>; - auto that = interleave( bit_cast(a, ftype()),bit_cast(b, ftype()) - , bit_cast(c, ftype()),bit_cast(d, ftype()) + auto that = interleave( bit_cast(v0,ftype()),bit_cast(v1,ftype()) + , bit_cast(v2,ftype()),bit_cast(v3,ftype()) ); - return kumi::map( [](auto m){ return bit_cast(m,as{}); }, that); + return kumi::map( [](auto m){ return bit_cast(m,as()); }, that); } else { - return interleave.behavior(as, wide, wide, wide>>{}, cpu_{}, o, a, b, c, d); + return interleave.behavior(as, wide>>{}, cpu_{}, o, v0, v1, v2, v3); } } }