use formatter

AvvALlV · Oct 1, 2023 · 3e562f5 · 3e562f5
1 parent 2e15a2c
commit 3e562f5
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 31 deletions.
diff --git a/src/cl/matrix_multiplication.cl b/src/cl/matrix_multiplication.cl
@@ -1,11 +1,15 @@
 #ifdef __CLION_IDE__
+
 #include <libgpu/opencl/cl/clion_defines.cl>
+
 #endif
 
 
 #define TS 16
 #define WPT 4
-__kernel void matrix_multiplication(__global float* a, __global float* b, __global float* c, unsigned int M, unsigned int K, unsigned N) {
+__kernel void
+matrix_multiplication(__global float *a, __global float *b, __global float *c, unsigned int M, unsigned int K,
+                      unsigned N) {
     int lx = get_local_id(0);
     int ly = get_local_id(1);
     int gx = get_global_id(0);
@@ -20,7 +24,7 @@ __kernel void matrix_multiplication(__global float* a, __global float* b, __glob
     for (int tile_k = 0; tile_k * TS < K; ++tile_k) {
         for (int w = 0; w < WPT; ++w) {
             tileA[ly + w * RTS][lx] = gx < N && (gy + w * RTS) < M ? a[(gy + w * RTS) * K + tile_k * TS + lx] : 0;
-            tileB[ly + w * RTS][lx] = gx < N && (gy + w * RTS) < M ? b[(ly +  tile_k * TS + w * RTS) * N + gx] : 0;
+            tileB[ly + w * RTS][lx] = gx < N && (gy + w * RTS) < M ? b[(ly + tile_k * TS + w * RTS) * N + gx] : 0;
         }
         barrier(CLK_LOCAL_MEM_FENCE);
         for (int k = 0; k < TS; ++k) {
@@ -36,7 +40,9 @@ __kernel void matrix_multiplication(__global float* a, __global float* b, __glob
     }
 }
 
-__kernel void matrix_multiplication_local(__global float* a, __global float* b, __global float* c, unsigned int M, unsigned int K, unsigned N) {
+__kernel void
+matrix_multiplication_local(__global float *a, __global float *b, __global float *c, unsigned int M, unsigned int K,
+                            unsigned N) {
     int gx = get_global_id(0);
     int gy = get_global_id(1);
     int lx = get_local_id(0);
@@ -62,7 +68,9 @@ __kernel void matrix_multiplication_local(__global float* a, __global float* b,
 }
 
 
-__kernel void matrix_multiplication_naive(__global float* a, __global float* b, __global float* c, unsigned int M, unsigned int K, unsigned N) {
+__kernel void
+matrix_multiplication_naive(__global float *a, __global float *b, __global float *c, unsigned int M, unsigned int K,
+                            unsigned N) {
     int i = get_global_id(0);
     int j = get_global_id(1);
 

diff --git a/src/cl/matrix_transpose.cl b/src/cl/matrix_transpose.cl
@@ -1,10 +1,11 @@
 #ifdef __CLION_IDE__
+
 #include <libgpu/opencl/cl/clion_defines.cl>
+
 #endif
 
 #define TS 16
-__kernel void matrix_transpose(__global float* a, __global float* at, unsigned int M, unsigned int K)
-{
+__kernel void matrix_transpose(__global float *a, __global float *at, unsigned int M, unsigned int K) {
     unsigned int gx = get_global_id(0);
     unsigned int gy = get_global_id(1);
     unsigned int lx = get_local_id(0);

diff --git a/src/main_matrix_multiplication.cpp b/src/main_matrix_multiplication.cpp
@@ -12,7 +12,7 @@
 #include <string>
 
 template<typename EXECUTOR_TYPE>
-void runner(const std::string& kernel_name, EXECUTOR_TYPE executor, int benchmarkingIters, int gflops) {
+void runner(const std::string &kernel_name, EXECUTOR_TYPE executor, int benchmarkingIters, int gflops) {
     std::cout << kernel_name << std::endl;
     ocl::Kernel matrix_multiplication_kernel(matrix_multiplication, matrix_multiplication_length, kernel_name);
     matrix_multiplication_kernel.compile();
@@ -31,8 +31,7 @@ void runner(const std::string& kernel_name, EXECUTOR_TYPE executor, int benchmar
 }
 
 
-int main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
     gpu::Device device = gpu::chooseGPUDevice(argc, argv);
 
     gpu::Context context;
@@ -43,13 +42,14 @@ int main(int argc, char **argv)
     unsigned int M = 1024;
     unsigned int K = 1024;
     unsigned int N = 1024;
-    const size_t gflops = ((size_t) M * K * N * 2) / (1000 * 1000 * 1000); // умножить на два, т.к. операция сложения и умножения
+    const size_t gflops =
+            ((size_t) M * K * N * 2) / (1000 * 1000 * 1000); // умножить на два, т.к. операция сложения и умножения
 
-    std::vector<float> as(M*K, 0);
-    std::vector<float> bs(K*N, 0);
-    std::vector<float> cs(M*N, 0);
+    std::vector<float> as(M * K, 0);
+    std::vector<float> bs(K * N, 0);
+    std::vector<float> cs(M * N, 0);
 
-    FastRandom r(M+K+N);
+    FastRandom r(M + K + N);
     for (unsigned int i = 0; i < as.size(); ++i) {
         as[i] = r.nextf();
     }
@@ -79,14 +79,14 @@ int main(int argc, char **argv)
     const std::vector<float> cs_cpu_reference = cs;
 
     gpu::gpu_mem_32f as_gpu, bs_gpu, cs_gpu;
-    as_gpu.resizeN(M*K);
-    bs_gpu.resizeN(K*N);
-    cs_gpu.resizeN(M*N);
+    as_gpu.resizeN(M * K);
+    bs_gpu.resizeN(K * N);
+    cs_gpu.resizeN(M * N);
 
-    as_gpu.writeN(as.data(), M*K);
-    bs_gpu.writeN(bs.data(), K*N);
+    as_gpu.writeN(as.data(), M * K);
+    bs_gpu.writeN(bs.data(), K * N);
 
-    auto checker = [&] () {
+    auto checker = [&]() {
         // Проверяем корректность результатов
         double diff_sum = 0;
         for (int i = 0; i < M * N; ++i) {

diff --git a/src/main_matrix_transpose.cpp b/src/main_matrix_transpose.cpp
@@ -8,11 +8,9 @@
 
 #include <vector>
 #include <iostream>
-#include <stdexcept>
 
 
-int main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
     gpu::Device device = gpu::chooseGPUDevice(argc, argv);
 
     gpu::Context context;
@@ -23,21 +21,21 @@ int main(int argc, char **argv)
     unsigned int M = 1024;
     unsigned int K = 1024;
 
-    std::vector<float> as(M*K, 0);
-    std::vector<float> as_t(M*K, 0);
+    std::vector<float> as(M * K, 0);
+    std::vector<float> as_t(M * K, 0);
 
-    FastRandom r(M+K);
+    FastRandom r(M + K);
     for (unsigned int i = 0; i < as.size(); ++i) {
         as[i] = r.nextf();
     }
     std::cout << "Data generated for M=" << M << ", K=" << K << std::endl;
 
 
     gpu::gpu_mem_32f as_gpu, as_t_gpu;
-    as_gpu.resizeN(M*K);
-    as_t_gpu.resizeN(K*M);
+    as_gpu.resizeN(M * K);
+    as_t_gpu.resizeN(K * M);
 
-    as_gpu.writeN(as.data(), M*K);
+    as_gpu.writeN(as.data(), M * K);
 
     ocl::Kernel matrix_transpose_kernel(matrix_transpose, matrix_transpose_length, "matrix_transpose");
     matrix_transpose_kernel.compile();
@@ -50,10 +48,10 @@ int main(int argc, char **argv)
             t.nextLap();
         }
         std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
-        std::cout << "GPU: " << M*K/1000.0/1000.0 / t.lapAvg() << " millions/s" << std::endl;
+        std::cout << "GPU: " << M * K / 1000.0 / 1000.0 / t.lapAvg() << " millions/s" << std::endl;
     }
 
-    as_t_gpu.readN(as_t.data(), M*K);
+    as_t_gpu.readN(as_t.data(), M * K);
 
     // Проверяем корректность результатов
     for (int j = 0; j < M; ++j) {