OPM · multitalentloes · Dec 3, 2024
diff --git a/opm/simulators/linalg/gpuistl/GpuDILU.cpp b/opm/simulators/linalg/gpuistl/GpuDILU.cpp
@@ -270,12 +270,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
     auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
         this->update(moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
     };
-    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
+    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in DILU update) Move data to reordered matrix");
 
     auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
         this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
     };
-    m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
+    m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in DILU update) DILU factorization");
 
     // tune the thread-block size of the apply
     GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@@ -285,12 +285,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
     auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
         this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
     };
-    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
+    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "(in DILU apply) Triangular lower solve");
 
     auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
-        this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
+        this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
     };
-    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
+    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "(in DILU apply) Triangular upper solve");
 }
 
 } // namespace Opm::gpuistl

diff --git a/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp b/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp
@@ -331,13 +331,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
     auto tuneMoveThreadBlockSizeInUpdate
         = [this](int moveThreadBlockSize) { this->update(moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize); };
     m_moveThreadBlockSize
-        = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
+        = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in ILU update) Move data to reordered matrix");
 
     auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize) {
         this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
     };
     m_ILU0FactorizationThreadBlockSize
-        = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
+        = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in ILU update) ILU factorization");
 
     // tune the thread-block size of the apply
     GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@@ -348,13 +348,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
         this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
     };
     m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(
-        tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
+        tuneLowerSolveThreadBlockSizeInApply, "(in ILU apply) Triangular lower solve");
 
     auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize) {
-        this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
+        this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
     };
     m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(
-        tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
+        tuneUpperSolveThreadBlockSizeInApply, "(in ILU apply) Triangular upper solve");
 }
 
 } // namespace Opm::gpuistl

diff --git a/opm/simulators/linalg/gpuistl/detail/autotuner.hpp b/opm/simulators/linalg/gpuistl/detail/autotuner.hpp
@@ -83,7 +83,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
     }
 
     OpmLog::info(
-        fmt::format("{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime));
+        fmt::format("[Kernel tuning completed] {}: Tuned Blocksize = {}, Fastest Runtime = {}ms.", descriptionOfFunction, bestBlockSize, bestTime));
 
     return bestBlockSize;
 }