Skip to content

Commit

Permalink
Cleanup useless/non-constexpr branchs (#2025)
Browse files Browse the repository at this point in the history
  • Loading branch information
SadiinsoSnowfall authored Nov 28, 2024
1 parent 603e963 commit d56b7d6
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 173 deletions.
56 changes: 25 additions & 31 deletions include/eve/module/core/regular/impl/simd/x86/add.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,42 +126,36 @@ namespace eve::detail

if constexpr(floating_value<T> &&( O::contains(lower) || O::contains(upper)) && !O::contains(strict))
{
if constexpr(current_api >= avx512)
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( c == category::float64x8 ) return _mm512_mask_add_round_pd (src, m, v, w, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_mask_add_round_ps (src, m, v, w, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( c == category::float64x8 ) return _mm512_mask_add_round_pd (src, m, v, w, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_mask_add_round_ps (src, m, v, w, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
{
auto vv = eve::combine(v, w);
auto ww = eve::combine(w, v);
auto vvpww = add[opts.drop(condition_key)](vv, ww);
auto s = slice(vvpww, eve::upper_);
return if_else(cx,s,src);
}
else return add.behavior(cpu_{}, opts, v, w);
auto vv = eve::combine(v, w);
auto ww = eve::combine(w, v);
auto vvpww = add[opts.drop(condition_key)](vv, ww);
auto s = slice(vvpww, eve::upper_);
return if_else(cx,s,src);
}
else return add.behavior(cpu_{}, opts, v, w);
else return add.behavior(cpu_{}, opts, v, w);
}
else if constexpr(O::contains(saturated))
{
constexpr auto sup_avx2 = current_api >= avx2;

if constexpr( floating_value<T> ) return add[cx](v, w);
else if constexpr( c == category::int16x32 ) return _mm512_mask_adds_epi16(src, m, v, w);
else if constexpr( c == category::uint16x32 ) return _mm512_mask_adds_epu16(src, m, v, w);
else if constexpr( c == category::int8x64 ) return _mm512_mask_adds_epi8(src, m, v, w);
else if constexpr( c == category::uint8x64 ) return _mm512_mask_adds_epu8(src, m, v, w);
else if constexpr( sup_avx2 && c == category::int16x16 ) return _mm256_mask_adds_epi16(src, m, v, w);
else if constexpr( sup_avx2 && c == category::uint16x16 ) return _mm256_mask_adds_epu16(src, m, v, w);
else if constexpr( sup_avx2 && c == category::int8x32 ) return _mm256_mask_adds_epi8(src, m, v, w);
else if constexpr( sup_avx2 && c == category::uint8x32 ) return _mm256_mask_adds_epu8(src, m, v, w);
else if constexpr( c == category::int16x8 ) return _mm_mask_adds_epi16(src, m, v, w);
else if constexpr( c == category::uint16x8 ) return _mm_mask_adds_epu16(src, m, v, w);
else if constexpr( c == category::int8x16 ) return _mm_mask_adds_epi8(src, m, v, w);
else if constexpr( c == category::uint8x16 ) return _mm_mask_adds_epu8(src, m, v, w);
else return add.behavior(cpu_{}, opts, v, w);
if constexpr( floating_value<T> ) return add[cx](v, w);
else if constexpr( c == category::int16x32 ) return _mm512_mask_adds_epi16(src, m, v, w);
else if constexpr( c == category::uint16x32 ) return _mm512_mask_adds_epu16(src, m, v, w);
else if constexpr( c == category::int8x64 ) return _mm512_mask_adds_epi8(src, m, v, w);
else if constexpr( c == category::uint8x64 ) return _mm512_mask_adds_epu8(src, m, v, w);
else if constexpr( c == category::int16x16 ) return _mm256_mask_adds_epi16(src, m, v, w);
else if constexpr( c == category::uint16x16 ) return _mm256_mask_adds_epu16(src, m, v, w);
else if constexpr( c == category::int8x32 ) return _mm256_mask_adds_epi8(src, m, v, w);
else if constexpr( c == category::uint8x32 ) return _mm256_mask_adds_epu8(src, m, v, w);
else if constexpr( c == category::int16x8 ) return _mm_mask_adds_epi16(src, m, v, w);
else if constexpr( c == category::uint16x8 ) return _mm_mask_adds_epu16(src, m, v, w);
else if constexpr( c == category::int8x16 ) return _mm_mask_adds_epi8(src, m, v, w);
else if constexpr( c == category::uint8x16 ) return _mm_mask_adds_epu8(src, m, v, w);
else return add.behavior(cpu_{}, opts, v, w);
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace eve::detail
else if constexpr( c == category::uint64x2 ) return r_t(_mm_lzcnt_epi64(a0));
else if constexpr( c == category::uint32x4 ) return r_t(_mm_lzcnt_epi32(a0));
}
else if constexpr( current_api >= sse2 )
else
{
//Inspired from: https://stackoverflow.com/questions/58823140/count-leading-zero-bits-for-each-element-in-avx2-vector-emulate-mm256-lzcnt-ep
using ri_t = wide<std::int32_t,N>;
Expand All @@ -55,8 +55,6 @@ namespace eve::detail
}
else return countl_zero.behavior(cpu_{}, opts, a0);
}
else
return countl_zero.behavior(cpu_{}, opts, a0);
}
else
{
Expand Down
25 changes: 11 additions & 14 deletions include/eve/module/core/regular/impl/simd/x86/div.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,19 @@ namespace eve::detail
}
else if constexpr (floating_value<T> && !O::contains(strict) && (O::contains(lower) || O::contains(upper)))
{
if constexpr(current_api >= avx512)
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;

if constexpr ( c == category::float64x8 ) return _mm512_add_round_pd (v, w, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_add_round_ps (v, w, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( c == category::float64x8 ) return _mm512_add_round_pd (v, w, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_add_round_ps (v, w, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
{
auto vv = combine(v, v);
auto ww = combine(w, w);
auto vvpww = div[o](vv, ww);
auto s = slice(vvpww, eve::upper_);
return if_else(cx,s,src);
}
auto vv = combine(v, v);
auto ww = combine(w, w);
auto vvpww = div[o](vv, ww);
auto s = slice(vvpww, eve::upper_);
return if_else(cx,s,src);
}
return div.behavior(cpu_{}, o, v, w);
}
else if constexpr (O::contains(toward_zero) || O::contains(upward) ||
O::contains(downward) || O::contains(to_nearest))
Expand Down
30 changes: 13 additions & 17 deletions include/eve/module/core/regular/impl/simd/x86/fma.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,26 +103,22 @@ namespace eve::detail
{
if constexpr(!O::contains(strict))
{
if constexpr(current_api >= avx512)
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmadd_round_pd (a, m, b, c, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmadd_round_ps (a, m, b, c, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmadd_round_pd (a, m, b, c, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmadd_round_ps (a, m, b, c, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto cc = eve::combine(c, c);
auto aabbcc = fma[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fma.behavior(cpu_{}, opts, a, b, c);
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto cc = eve::combine(c, c);
auto aabbcc = fma[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fma.behavior(cpu_{}, opts, a, b, c);
else return fma.behavior(cpu_{}, opts, a, b, c);
}
else return fma.behavior(cpu_{}, opts, a, b, c);
else return fma.behavior(cpu_{}, opts, a, b, c);
}
else if constexpr( cx == category::float32x16 ) return _mm512_mask_fmadd_ps(a, m, b, c);
else if constexpr( cx == category::float64x8 ) return _mm512_mask_fmadd_pd(a, m, b, c);
Expand Down
28 changes: 12 additions & 16 deletions include/eve/module/core/regular/impl/simd/x86/fms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,24 +107,20 @@ namespace eve::detail
{
if constexpr(!O::contains(strict))
{
if constexpr(current_api >= avx512)
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmsub_round_pd (v, m, w, x, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmsub_round_ps (v, m, w, x, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmsub_round_pd (v, m, w, x, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmsub_round_ps (v, m, w, x, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto aa = eve::combine(v, v);
auto bb = eve::combine(w, w);
auto cc = eve::combine(x, x);
auto aabbcc = fms[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fms.behavior(cpu_{}, opts, v, w, x);
auto aa = eve::combine(v, v);
auto bb = eve::combine(w, w);
auto cc = eve::combine(x, x);
auto aabbcc = fms[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fms.behavior(cpu_{}, opts, v, w, x);
else return fms.behavior(cpu_{}, opts, v, w, x);
}
else return fms.behavior(cpu_{}, opts, v, w, x);
}
Expand Down
30 changes: 13 additions & 17 deletions include/eve/module/core/regular/impl/simd/x86/fnma.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,26 +108,22 @@ namespace eve::detail
{
if constexpr(!O::contains(strict))
{
if constexpr(current_api >= avx512)
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmadd_round_pd (a, m, b, c, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmadd_round_ps (a, m, b, c, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmadd_round_pd (a, m, b, c, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmadd_round_ps (a, m, b, c, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto cc = eve::combine(c, c);
auto aabbcc = fnma[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fnma.behavior(cpu_{}, opts, a, b, c);
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto cc = eve::combine(c, c);
auto aabbcc = fnma[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fnma.behavior(cpu_{}, opts, a, b, c);
else return fnma.behavior(cpu_{}, opts, a, b, c);
}
else return fnma.behavior(cpu_{}, opts, a, b, c);
else return fnma.behavior(cpu_{}, opts, a, b, c);
}
if ((O::contains(lower) || O::contains(upper))&& floating_value<T>) return if_else(mask, eve::fnma[opts.drop(condition_key)](a, b, c), a);
else if constexpr( cx == category::float32x16 ) return _mm512_mask_fnmadd_ps(a, m, b, c);
Expand Down
30 changes: 13 additions & 17 deletions include/eve/module/core/regular/impl/simd/x86/fnms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,26 +105,22 @@ namespace eve::detail
{
if constexpr(!O::contains(strict))
{
if constexpr(current_api >= avx512)
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmsub_round_pd (a, m, b, c, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmsub_round_ps (a, m, b, c, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fnmsub_round_pd (a, m, b, c, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fnmsub_round_ps (a, m, b, c, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto cc = eve::combine(c, c);
auto aabbcc = fnms[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fnms.behavior(cpu_{}, opts, a, b, c);
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto cc = eve::combine(c, c);
auto aabbcc = fnms[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(mask,s,src);
}
else return fnms.behavior(cpu_{}, opts, a, b, c);
else return fnms.behavior(cpu_{}, opts, a, b, c);
}
else return fnms.behavior(cpu_{}, opts, a, b, c);
else return fnms.behavior(cpu_{}, opts, a, b, c);
}
if ((O::contains(lower) || O::contains(upper))&& floating_value<T>) return if_else(mask, eve::fnms[opts.drop(condition_key)](a, b, c), a);
else if constexpr( cx == category::float32x16 ) return _mm512_mask_fnmsub_ps(a, m, b, c);
Expand Down
26 changes: 11 additions & 15 deletions include/eve/module/core/regular/impl/simd/x86/mul.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,23 +135,19 @@ namespace eve::detail

if constexpr(floating_value<T> &&( O::contains(lower) || O::contains(upper)) && !O::contains(strict))
{
if constexpr(current_api >= avx512)
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( c == category::float64x8 ) return _mm512_mask_mul_round_pd (src, m, a, b, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_mask_mul_round_ps (src, m, a, b, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( c == category::float64x8 ) return _mm512_mask_mul_round_pd (src, m, a, b, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_mask_mul_round_ps (src, m, a, b, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
{
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto aapbb = mul[opts.drop(condition_key)](aa, bb);
auto s = slice(aapbb, eve::upper_);
return if_else(cx,s,src);
}
else return add.behavior(cpu_{}, opts, a, b);
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto aapbb = mul[opts.drop(condition_key)](aa, bb);
auto s = slice(aapbb, eve::upper_);
return if_else(cx,s,src);
}
else return add.behavior(cpu_{}, opts, a, b);
else return add.behavior(cpu_{}, opts, a, b);
}
else if constexpr(O::contains(saturated))
{
Expand Down
Loading

0 comments on commit d56b7d6

Please sign in to comment.