Skip to content

Commit

Permalink
Replace zmmi_t with regi_t
Browse files Browse the repository at this point in the history
  • Loading branch information
r-devulap committed Sep 7, 2023
1 parent 4767a4c commit 22c2f02
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 26 deletions.
32 changes: 16 additions & 16 deletions src/avx512-64bit-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ template <>
struct ymm_vector<float> {
using type_t = float;
using reg_t = __m256;
using zmmi_t = __m256i;
using regi_t = __m256i;
using opmask_t = __mmask8;
static const uint8_t numlanes = 8;

Expand All @@ -45,7 +45,7 @@ struct ymm_vector<float> {
{
return _mm256_set1_ps(type_max());
}
static zmmi_t
static regi_t
seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8)
{
return _mm256_set_epi32(v1, v2, v3, v4, v5, v6, v7, v8);
Expand Down Expand Up @@ -189,7 +189,7 @@ template <>
struct ymm_vector<uint32_t> {
using type_t = uint32_t;
using reg_t = __m256i;
using zmmi_t = __m256i;
using regi_t = __m256i;
using opmask_t = __mmask8;
static const uint8_t numlanes = 8;

Expand All @@ -206,7 +206,7 @@ struct ymm_vector<uint32_t> {
return _mm256_set1_epi32(type_max());
}

static zmmi_t
static regi_t
seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8)
{
return _mm256_set_epi32(v1, v2, v3, v4, v5, v6, v7, v8);
Expand Down Expand Up @@ -335,7 +335,7 @@ template <>
struct ymm_vector<int32_t> {
using type_t = int32_t;
using reg_t = __m256i;
using zmmi_t = __m256i;
using regi_t = __m256i;
using opmask_t = __mmask8;
static const uint8_t numlanes = 8;

Expand All @@ -352,7 +352,7 @@ struct ymm_vector<int32_t> {
return _mm256_set1_epi32(type_max());
} // TODO: this should broadcast bits as is?

static zmmi_t
static regi_t
seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8)
{
return _mm256_set_epi32(v1, v2, v3, v4, v5, v6, v7, v8);
Expand Down Expand Up @@ -481,7 +481,7 @@ template <>
struct zmm_vector<int64_t> {
using type_t = int64_t;
using reg_t = __m512i;
using zmmi_t = __m512i;
using regi_t = __m512i;
using halfreg_t = __m512i;
using opmask_t = __mmask8;
static const uint8_t numlanes = 8;
Expand All @@ -501,7 +501,7 @@ struct zmm_vector<int64_t> {
return _mm512_set1_epi64(type_max());
} // TODO: this should broadcast bits as is?

static zmmi_t
static regi_t
seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8)
{
return _mm512_set_epi64(v1, v2, v3, v4, v5, v6, v7, v8);
Expand Down Expand Up @@ -615,7 +615,7 @@ struct zmm_vector<int64_t> {
}
static reg_t reverse(reg_t zmm)
{
const zmmi_t rev_index = seti(NETWORK_64BIT_2);
const regi_t rev_index = seti(NETWORK_64BIT_2);
return permutexvar(rev_index, zmm);
}
static reg_t bitonic_merge(reg_t x)
Expand All @@ -631,7 +631,7 @@ template <>
struct zmm_vector<uint64_t> {
using type_t = uint64_t;
using reg_t = __m512i;
using zmmi_t = __m512i;
using regi_t = __m512i;
using halfreg_t = __m512i;
using opmask_t = __mmask8;
static const uint8_t numlanes = 8;
Expand All @@ -651,7 +651,7 @@ struct zmm_vector<uint64_t> {
return _mm512_set1_epi64(type_max());
}

static zmmi_t
static regi_t
seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8)
{
return _mm512_set_epi64(v1, v2, v3, v4, v5, v6, v7, v8);
Expand Down Expand Up @@ -753,7 +753,7 @@ struct zmm_vector<uint64_t> {
}
static reg_t reverse(reg_t zmm)
{
const zmmi_t rev_index = seti(NETWORK_64BIT_2);
const regi_t rev_index = seti(NETWORK_64BIT_2);
return permutexvar(rev_index, zmm);
}
static reg_t bitonic_merge(reg_t x)
Expand All @@ -769,7 +769,7 @@ template <>
struct zmm_vector<double> {
using type_t = double;
using reg_t = __m512d;
using zmmi_t = __m512i;
using regi_t = __m512i;
using halfreg_t = __m512d;
using opmask_t = __mmask8;
static const uint8_t numlanes = 8;
Expand All @@ -788,7 +788,7 @@ struct zmm_vector<double> {
{
return _mm512_set1_pd(type_max());
}
static zmmi_t
static regi_t
seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8)
{
return _mm512_set_epi64(v1, v2, v3, v4, v5, v6, v7, v8);
Expand Down Expand Up @@ -901,7 +901,7 @@ struct zmm_vector<double> {
}
static reg_t reverse(reg_t zmm)
{
const zmmi_t rev_index = seti(NETWORK_64BIT_2);
const regi_t rev_index = seti(NETWORK_64BIT_2);
return permutexvar(rev_index, zmm);
}
static reg_t bitonic_merge(reg_t x)
Expand All @@ -921,7 +921,7 @@ struct zmm_vector<double> {
template <typename vtype, typename reg_t = typename vtype::reg_t>
X86_SIMD_SORT_INLINE reg_t sort_zmm_64bit(reg_t zmm)
{
const typename vtype::zmmi_t rev_index = vtype::seti(NETWORK_64BIT_2);
const typename vtype::regi_t rev_index = vtype::seti(NETWORK_64BIT_2);
zmm = cmp_merge<vtype>(
zmm, vtype::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(zmm), 0xAA);
zmm = cmp_merge<vtype>(
Expand Down
20 changes: 10 additions & 10 deletions src/avx512-64bit-keyvalue-networks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ template <typename vtype1,
typename index_type = typename vtype2::reg_t>
X86_SIMD_SORT_INLINE reg_t sort_zmm_64bit(reg_t key_zmm, index_type &index_zmm)
{
const typename vtype1::zmmi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::zmmi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
const typename vtype1::regi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::regi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
key_zmm = cmp_merge<vtype1, vtype2>(
key_zmm,
vtype1::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(key_zmm),
Expand Down Expand Up @@ -87,8 +87,8 @@ X86_SIMD_SORT_INLINE void bitonic_merge_two_zmm_64bit(reg_t &key_zmm1,
index_type &index_zmm1,
index_type &index_zmm2)
{
const typename vtype1::zmmi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::zmmi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
const typename vtype1::regi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::regi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
// 1) First step of a merging network: coex of zmm1 and zmm2 reversed
key_zmm2 = vtype1::permutexvar(rev_index1, key_zmm2);
index_zmm2 = vtype2::permutexvar(rev_index2, index_zmm2);
Expand Down Expand Up @@ -120,8 +120,8 @@ template <typename vtype1,
X86_SIMD_SORT_INLINE void bitonic_merge_four_zmm_64bit(reg_t *key_zmm,
index_type *index_zmm)
{
const typename vtype1::zmmi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::zmmi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
const typename vtype1::regi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::regi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
// 1) First step of a merging network
reg_t key_zmm2r = vtype1::permutexvar(rev_index1, key_zmm[2]);
reg_t key_zmm3r = vtype1::permutexvar(rev_index1, key_zmm[3]);
Expand Down Expand Up @@ -186,8 +186,8 @@ template <typename vtype1,
X86_SIMD_SORT_INLINE void bitonic_merge_eight_zmm_64bit(reg_t *key_zmm,
index_type *index_zmm)
{
const typename vtype1::zmmi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::zmmi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
const typename vtype1::regi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::regi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
reg_t key_zmm4r = vtype1::permutexvar(rev_index1, key_zmm[4]);
reg_t key_zmm5r = vtype1::permutexvar(rev_index1, key_zmm[5]);
reg_t key_zmm6r = vtype1::permutexvar(rev_index1, key_zmm[6]);
Expand Down Expand Up @@ -280,8 +280,8 @@ template <typename vtype1,
X86_SIMD_SORT_INLINE void bitonic_merge_sixteen_zmm_64bit(reg_t *key_zmm,
index_type *index_zmm)
{
const typename vtype1::zmmi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::zmmi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
const typename vtype1::regi_t rev_index1 = vtype1::seti(NETWORK_64BIT_2);
const typename vtype2::regi_t rev_index2 = vtype2::seti(NETWORK_64BIT_2);
reg_t key_zmm8r = vtype1::permutexvar(rev_index1, key_zmm[8]);
reg_t key_zmm9r = vtype1::permutexvar(rev_index1, key_zmm[9]);
reg_t key_zmm10r = vtype1::permutexvar(rev_index1, key_zmm[10]);
Expand Down

0 comments on commit 22c2f02

Please sign in to comment.