done

AvvALlV · Oct 7, 2023 · 1b7c83a · 1b7c83a
1 parent 310eedf
commit 1b7c83a
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 4 deletions.
diff --git a/src/cl/merge.cl b/src/cl/merge.cl
@@ -1 +1,29 @@
+#ifdef __CLION_IDE__
 
+#include <libgpu/opencl/cl/clion_defines.cl>
+
+#endif
+
+__kernel void merge(__global float* a, __global float* b, unsigned int n, unsigned int k) {
+    int gid = get_global_id(0);
+
+    int batch_id = gid / k;
+    int local_id = gid - batch_id * k;
+
+    bool is_left = batch_id % 2 == 0;
+    int other_batch_id =  is_left ? batch_id + 1 : batch_id - 1;
+    int other_batch_bound = other_batch_id * k;
+    int left_bound = is_left ? batch_id * k : (batch_id - 1) * k;
+
+    int start = other_batch_bound;
+    int end = other_batch_bound + k;
+    while (start < end) {
+        int mid = (start + end) / 2;
+        if (a[gid] < a[mid] || (is_left && a[gid] == a[mid]))
+            end = mid;
+        else
+            start = mid + 1;
+    }
+
+    b[left_bound + local_id + end - other_batch_bound] = a[gid];
+}
diff --git a/src/main_merge.cpp b/src/main_merge.cpp
@@ -50,9 +50,10 @@ int main(int argc, char **argv) {
         std::cout << "CPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
         std::cout << "CPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl;
     }
-    /*
-    gpu::gpu_mem_32f as_gpu;
+
+    gpu::gpu_mem_32f as_gpu, bs_gpu;
     as_gpu.resizeN(n);
+    bs_gpu.resizeN(n);
     {
         ocl::Kernel merge(merge_kernel, merge_kernel_length, "merge");
         merge.compile();
@@ -62,7 +63,10 @@ int main(int argc, char **argv) {
             t.restart();// Запускаем секундомер после прогрузки данных, чтобы замерять время работы кернела, а не трансфера данных
             unsigned int workGroupSize = 128;
             unsigned int global_work_size = (n + workGroupSize - 1) / workGroupSize * workGroupSize;
-            merge.exec(gpu::WorkSize(workGroupSize, global_work_size), as_gpu, n);
+            for (int k = 1; k < n; k *= 2) {
+                merge.exec(gpu::WorkSize(workGroupSize, global_work_size), as_gpu, bs_gpu, n, k);
+                as_gpu.swap(bs_gpu);
+            }
             t.nextLap();
         }
         std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
@@ -73,6 +77,5 @@ int main(int argc, char **argv) {
     for (int i = 0; i < n; ++i) {
         EXPECT_THE_SAME(as[i], cpu_sorted[i], "GPU results should be equal to CPU results!");
     }
-*/
     return 0;
 }