Skip to content

Commit

Permalink
done
Browse files Browse the repository at this point in the history
  • Loading branch information
AvvALlV committed Oct 7, 2023
1 parent 310eedf commit 1b7c83a
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 4 deletions.
28 changes: 28 additions & 0 deletions src/cl/merge.cl
Original file line number Diff line number Diff line change
@@ -1 +1,29 @@
#ifdef __CLION_IDE__

#include <libgpu/opencl/cl/clion_defines.cl>

#endif

__kernel void merge(__global float* a, __global float* b, unsigned int n, unsigned int k) {
int gid = get_global_id(0);

int batch_id = gid / k;
int local_id = gid - batch_id * k;

bool is_left = batch_id % 2 == 0;
int other_batch_id = is_left ? batch_id + 1 : batch_id - 1;
int other_batch_bound = other_batch_id * k;
int left_bound = is_left ? batch_id * k : (batch_id - 1) * k;

int start = other_batch_bound;
int end = other_batch_bound + k;
while (start < end) {
int mid = (start + end) / 2;
if (a[gid] < a[mid] || (is_left && a[gid] == a[mid]))
end = mid;
else
start = mid + 1;
}

b[left_bound + local_id + end - other_batch_bound] = a[gid];
}
11 changes: 7 additions & 4 deletions src/main_merge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ int main(int argc, char **argv) {
std::cout << "CPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << "CPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl;
}
/*
gpu::gpu_mem_32f as_gpu;

gpu::gpu_mem_32f as_gpu, bs_gpu;
as_gpu.resizeN(n);
bs_gpu.resizeN(n);
{
ocl::Kernel merge(merge_kernel, merge_kernel_length, "merge");
merge.compile();
Expand All @@ -62,7 +63,10 @@ int main(int argc, char **argv) {
t.restart();// Запускаем секундомер после прогрузки данных, чтобы замерять время работы кернела, а не трансфера данных
unsigned int workGroupSize = 128;
unsigned int global_work_size = (n + workGroupSize - 1) / workGroupSize * workGroupSize;
merge.exec(gpu::WorkSize(workGroupSize, global_work_size), as_gpu, n);
for (int k = 1; k < n; k *= 2) {
merge.exec(gpu::WorkSize(workGroupSize, global_work_size), as_gpu, bs_gpu, n, k);
as_gpu.swap(bs_gpu);
}
t.nextLap();
}
std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
Expand All @@ -73,6 +77,5 @@ int main(int argc, char **argv) {
for (int i = 0; i < n; ++i) {
EXPECT_THE_SAME(as[i], cpu_sorted[i], "GPU results should be equal to CPU results!");
}
*/
return 0;
}

0 comments on commit 1b7c83a

Please sign in to comment.