From dffdccefa74fad6b5bc89420a70e6ddd546983d0 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Thu, 26 Oct 2023 14:02:05 +0200 Subject: [PATCH] more tuning --- lib/cgpt/lib/stencil/tensor.h | 1 + lib/gpt/core/local_stencil/tensor.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/cgpt/lib/stencil/tensor.h b/lib/cgpt/lib/stencil/tensor.h index 3fde4282..f8ad1164 100644 --- a/lib/cgpt/lib/stencil/tensor.h +++ b/lib/cgpt/lib/stencil/tensor.h @@ -263,6 +263,7 @@ class cgpt_stencil_tensor : public cgpt_stencil_tensor_base { } //std::cout << GridLogMessage<< "Group " << osites0 << " to " << osites1 << " has oblocks " << oblocks << " and extra " << osites_extra << " from " << osites_extra_start << " compare to " << osites << std::endl; + #ifdef GRID_HAS_ACCELERATOR #define MAP_INDEX(x,ss) ss int coffset = 0; diff --git a/lib/gpt/core/local_stencil/tensor.py b/lib/gpt/core/local_stencil/tensor.py index 43700183..1cebbcd0 100644 --- a/lib/gpt/core/local_stencil/tensor.py +++ b/lib/gpt/core/local_stencil/tensor.py @@ -41,7 +41,7 @@ def __init__(self, lat, points, code, segments, local=1): lat.v_obj[0], lat.grid.obj, points, self.code, self.segments, local ) self.osites_per_instruction = 4 - self.osites_per_cache_block = 4096 + self.osites_per_cache_block = lat.grid.gsites def __call__(self, *fields): cgpt.stencil_tensor_execute(self.obj, list(fields),