Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

have activelaneid_u32 call ockl_activelane_u32 #1018

Merged
merged 1 commit into from
Feb 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion include/hc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2817,7 +2817,10 @@ extern "C" inline __attribute((always_inline)) std::uint64_t __cycle_u64() __HC_
*
* @return The result will be in the range 0 to WAVESIZE - 1.
*/
extern "C" unsigned int __activelaneid_u32() __HC__;
extern "C" unsigned int __ockl_activelane_u32(void);
extern "C" inline unsigned int __activelaneid_u32() __HC__ {
return __ockl_activelane_u32();
}

/**
* Return a bit mask shows which active work-items in the
Expand Down
107 changes: 107 additions & 0 deletions tests/Unit/AMDGPU/activelaneid.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@

// RUN: %hc %s -o %t.out && %t.out

#include <hc.hpp>

#include <iostream>
#include <random>
#include <vector>

#define WAVEFRONT_SIZE (64) // as of now, all HSA agents have wavefront size of 64

#define GRID_SIZE (WAVEFRONT_SIZE * WAVEFRONT_SIZE)

#define TEST_DEBUG (0)

// A test case to verify builtin function
// - __activelaneid_u32

// test __activelaneid_u32
bool test() {
using namespace hc;
bool ret = true;

// initialize test data
// test is a table of size WAVEFRONT_SIZE * WAVEFRONT_SIZE
std::vector<uint32_t> test(GRID_SIZE);

std::random_device rd;
std::uniform_int_distribution<int> int_dist(0, WAVEFRONT_SIZE - 1);

// for each block of WAVEFRONT_SIZE, we randomly set 1s inside the block
// the number of 1s in the block equals to the index of the block
// (the 1st block of WAVEFRONT_SIZE has 0 1s, the 2nd block of WAVEFRONT_SIZE has 1 1, and so on)
for (int i = 0; i < WAVEFRONT_SIZE; ++i) {
for (int j = 0; j < WAVEFRONT_SIZE; ++j) {
if (j < i) {
test[i * WAVEFRONT_SIZE + j] = 1;
} else {
test[i * WAVEFRONT_SIZE + j] = 0;
}
}


for (int j = 0; j < WAVEFRONT_SIZE * 10; ++j) {
int k1 = int_dist(rd);
int k2 = int_dist(rd);
if (k1 != k2) {
test[i * WAVEFRONT_SIZE + k1] ^= test[i * WAVEFRONT_SIZE + k2] ^= test[i * WAVEFRONT_SIZE + k1] ^= test[i * WAVEFRONT_SIZE + k2]; }
}

}

#if TEST_DEBUG
for (int i = 0; i < WAVEFRONT_SIZE; ++i) {
for (int j = 0; j < WAVEFRONT_SIZE; ++j) {
std::cout << test[i * WAVEFRONT_SIZE + j] << " ";
}
std::cout << "\n";
}
#endif

array<uint32_t, 1> test_GPU(GRID_SIZE);
copy(test.begin(), test_GPU);
david-salinas marked this conversation as resolved.
Show resolved Hide resolved

array<uint32_t, 1> output_GPU(GRID_SIZE);
extent<1> ex(GRID_SIZE);
parallel_for_each(ex, [&](index<1>& idx) [[hc]] {
if (test_GPU[idx] == 1)
output_GPU(idx) = __activelaneid_u32();
else
output_GPU(idx) = 99;
}).wait();

// verify result
std::vector<uint32_t> output = output_GPU;
for (int i = 0; i < WAVEFRONT_SIZE; ++i) {
int activeLaneID = 0;
for (int j = 0; j < WAVEFRONT_SIZE; ++j) {
if (test[i * WAVEFRONT_SIZE +j] == 1)
ret &= (output[i * WAVEFRONT_SIZE + j] == activeLaneID++);
else
ret &= (output[i * WAVEFRONT_SIZE +j] == 99);

#if TEST_DEBUG
if (!ret) {
std::cout << "FAILED: laneid " << activeLaneID << " ";
ret = true;
}
std::cout << "i: " << i << " j: " << j << " input = " << test[i * WAVEFRONT_SIZE +j] << " j = " << j << " and output = " << output[i * WAVEFRONT_SIZE + j] << "\n";
#endif
}
#if TEST_DEBUG
std::cout << "\n";
#endif
}

return ret;
}

int main() {
bool ret = true;

ret &= test();

return !(ret == true);
}