diff --git a/opm/simulators/linalg/gpuistl/GpuDILU.cpp b/opm/simulators/linalg/gpuistl/GpuDILU.cpp index cba3be2c993..59eadbdd9aa 100644 --- a/opm/simulators/linalg/gpuistl/GpuDILU.cpp +++ b/opm/simulators/linalg/gpuistl/GpuDILU.cpp @@ -270,12 +270,12 @@ GpuDILU::tuneThreadBlockSizes() auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){ this->update(moveThreadBlockSize, m_DILUFactorizationThreadBlockSize); }; - m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix"); + m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in DILU update) Move data to reordered matrix"); auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){ this->update(m_moveThreadBlockSize, factorizationThreadBlockSize); }; - m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization"); + m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in DILU update) DILU factorization"); // tune the thread-block size of the apply GpuVector tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize()); @@ -285,12 +285,12 @@ GpuDILU::tuneThreadBlockSizes() auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){ this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_DILUFactorizationThreadBlockSize); }; - m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set"); + m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "(in DILU apply) Triangular lower solve"); auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){ - this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize); + this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize); }; - m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set"); + m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "(in DILU apply) Triangular upper solve"); } } // namespace Opm::gpuistl diff --git a/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp b/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp index 24ba06ecb06..5bd663aed17 100644 --- a/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp +++ b/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp @@ -331,13 +331,13 @@ OpmGpuILU0::tuneThreadBlockSizes() auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize) { this->update(moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize); }; m_moveThreadBlockSize - = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix"); + = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in ILU update) Move data to reordered matrix"); auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize) { this->update(m_moveThreadBlockSize, factorizationThreadBlockSize); }; m_ILU0FactorizationThreadBlockSize - = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization"); + = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in ILU update) ILU factorization"); // tune the thread-block size of the apply GpuVector tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize()); @@ -348,13 +348,13 @@ OpmGpuILU0::tuneThreadBlockSizes() this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_ILU0FactorizationThreadBlockSize); }; m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize( - tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set"); + tuneLowerSolveThreadBlockSizeInApply, "(in ILU apply) Triangular lower solve"); auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize) { - this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize); + this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize); }; m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize( - tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set"); + tuneUpperSolveThreadBlockSizeInApply, "(in ILU apply) Triangular upper solve"); } } // namespace Opm::gpuistl diff --git a/opm/simulators/linalg/gpuistl/detail/autotuner.hpp b/opm/simulators/linalg/gpuistl/detail/autotuner.hpp index 0de81e91156..1cc2718d0fe 100644 --- a/opm/simulators/linalg/gpuistl/detail/autotuner.hpp +++ b/opm/simulators/linalg/gpuistl/detail/autotuner.hpp @@ -83,7 +83,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction) } OpmLog::info( - fmt::format("{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime)); + fmt::format("[Kernel tuning completed] {}: Tuned Blocksize = {}, Fastest Runtime = {}ms.", descriptionOfFunction, bestBlockSize, bestTime)); return bestBlockSize; }