first cartesian stencil in gpt

lehner · Oct 7, 2023 · 7cbd82a · 7cbd82a
1 parent 174fd9f
commit 7cbd82a
Show file tree

Hide file tree

Showing 12 changed files with 284 additions and 82 deletions.
diff --git a/benchmarks/stencil.py b/benchmarks/stencil.py
@@ -43,7 +43,11 @@
                 }
             )
     st = g.stencil.matrix(
-        U[0], [(0, 0, 0, 0), (1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1)], [0], [1,2,3,4], code
+        U[0],
+        [(0, 0, 0, 0), (1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1)],
+        [0],
+        [1, 2, 3, 4],
+        code,
     )
     # test plaquette
     P = g.lattice(U[0])

diff --git a/lib/cgpt/lib/lattice/base.h b/lib/cgpt/lib/lattice/base.h
@@ -64,8 +64,8 @@ class cgpt_Lattice_base {
   virtual void invert_matrix(std::vector<cgpt_Lattice_base*>& matrix_inv, std::vector<cgpt_Lattice_base*>& matrix, long n_virtual) = 0;
   virtual void determinant(cgpt_Lattice_base* det, std::vector<cgpt_Lattice_base*>& matrix, long n_virtual) = 0; // this determines type of matrix[0]
   virtual GridBase* get_grid() = 0;
-  virtual cgpt_stencil_matrix_base* stencil_matrix(GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size) = 0;
-  virtual cgpt_stencil_matrix_vector_base* stencil_matrix_vector(cgpt_Lattice_base* matrix, GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size) = 0;
+  virtual cgpt_stencil_matrix_base* stencil_matrix(GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size, long local) = 0;
+  virtual cgpt_stencil_matrix_vector_base* stencil_matrix_vector(cgpt_Lattice_base* matrix, GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size, long local) = 0;
 
 };
 

diff --git a/lib/cgpt/lib/lattice/implementation.h b/lib/cgpt/lib/lattice/implementation.h
@@ -273,12 +273,12 @@ class cgpt_Lattice : public cgpt_Lattice_base {
     return l.Grid();
   }
 
-  virtual cgpt_stencil_matrix_base* stencil_matrix(GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size) {
-    return cgpt_stencil_matrix_create<T>(grid, shifts, code, code_parallel_block_size);
+  virtual cgpt_stencil_matrix_base* stencil_matrix(GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size, long local) {
+    return cgpt_stencil_matrix_create<T>(grid, shifts, code, code_parallel_block_size, local);
   }
 
-  virtual cgpt_stencil_matrix_vector_base* stencil_matrix_vector(cgpt_Lattice_base* matrix, GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size) {
-    return cgpt_stencil_matrix_vector_create<T>(matrix, grid, shifts, code, code_parallel_block_size);
+  virtual cgpt_stencil_matrix_vector_base* stencil_matrix_vector(cgpt_Lattice_base* matrix, GridBase* grid, PyObject* shifts, PyObject* code, long code_parallel_block_size, long local) {
+    return cgpt_stencil_matrix_vector_create<T>(matrix, grid, shifts, code, code_parallel_block_size, local);
   }
 
 };
diff --git a/lib/cgpt/lib/lib.h b/lib/cgpt/lib/lib.h
@@ -25,6 +25,7 @@
 #include <numpy/arrayobject.h>
 #include <vector>
 #include <map>
+#include <set>
 #include <string>
 #include <iostream>
 #include <sstream>

diff --git a/lib/cgpt/lib/stencil.cc b/lib/cgpt/lib/stencil.cc
@@ -24,16 +24,18 @@ EXPORT(stencil_matrix_create,{
     void* _lattice;
     PyObject* _shifts, * _code;
     long _code_parallel_block_size;
-    if (!PyArg_ParseTuple(args, "llOOl", &_lattice, &_grid, &_shifts, &_code,
-			  &_code_parallel_block_size)) {
+    long _local;
+    if (!PyArg_ParseTuple(args, "llOOll", &_lattice, &_grid, &_shifts, &_code,
+			  &_code_parallel_block_size, &_local)) {
       return NULL;
     }
 
     GridBase* grid = (GridBase*)_grid;
     cgpt_Lattice_base* lattice = (cgpt_Lattice_base*)_lattice;
 
     return PyLong_FromVoidPtr(lattice->stencil_matrix(grid, _shifts, _code,
-						      _code_parallel_block_size));
+						      _code_parallel_block_size,
+						      _local));
   });
 
 EXPORT(stencil_matrix_vector_create,{
@@ -43,8 +45,9 @@ EXPORT(stencil_matrix_vector_create,{
     void* _lattice_vector;
     PyObject* _shifts, * _code;
     long _code_parallel_block_size;
-    if (!PyArg_ParseTuple(args, "lllOOl", &_lattice_matrix, &_lattice_vector, &_grid, &_shifts, &_code,
-			  &_code_parallel_block_size)) {
+    long _local;
+    if (!PyArg_ParseTuple(args, "lllOOll", &_lattice_matrix, &_lattice_vector, &_grid, &_shifts, &_code,
+			  &_code_parallel_block_size, &_local)) {
       return NULL;
     }
 
@@ -53,7 +56,8 @@ EXPORT(stencil_matrix_vector_create,{
     cgpt_Lattice_base* lattice_vector = (cgpt_Lattice_base*)_lattice_vector;
 
     return PyLong_FromVoidPtr(lattice_vector->stencil_matrix_vector(lattice_matrix, grid, _shifts, _code,
-								    _code_parallel_block_size));
+								    _code_parallel_block_size,
+								    _local));
   });
 
 EXPORT(stencil_matrix_execute,{

diff --git a/lib/cgpt/lib/stencil/common.h b/lib/cgpt/lib/stencil/common.h
@@ -17,6 +17,26 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 
+// cartesian stencil fetch
+#define fetch_cs(sidx, obj, point, site, view, do_adj) {		\
+    if (point == -1) {							\
+      obj = coalescedRead(view[site]);					\
+    } else {								\
+      int ptype;							\
+      auto SE = sview[sidx].GetEntry(ptype, point, ss);		\
+      if (SE->_is_local ) {						\
+	int perm = SE->_permute;					\
+	obj = coalescedReadPermute(view[SE->_offset],ptype,perm,lane);	\
+      } else {								\
+	obj = coalescedRead(buf[sidx][SE->_offset],lane);		\
+      }									\
+      acceleratorSynchronise();						\
+    }									\
+    if (do_adj)								\
+      obj = adj(obj);							\
+  }
+
+// general local stencil fetch
 #ifndef GRID_HAS_ACCELERATOR
 
 // cpu fetch version
@@ -91,3 +111,80 @@ template<typename T, int N1, int N2, int M>    struct matrixFromTypeAtLevel2<iMa
 template<typename T, int N2>    struct matrixFromTypeAtLevel2<iScalar<T>,0,N2> { typedef iScalar<typename matrixFromTypeAtLevel<T,N2-1>::type> type; };
 template<typename T, int N2, int M>    struct matrixFromTypeAtLevel2<iVector<T,M>,0,N2> { typedef iMatrix<typename matrixFromTypeAtLevel<T,N2-1>::type,M> type; };
 template<typename T, int N2, int M>    struct matrixFromTypeAtLevel2<iMatrix<T,M>,0,N2> { typedef iMatrix<typename matrixFromTypeAtLevel<T,N2-1>::type,M> type; };
+
+// cartesian
+static void cgpt_shift_to_cartesian_dir_disp(const Coordinate& s, int& dir, int& disp) {
+  dir = -1;
+  for (int i=0;i<s.size();i++) {
+    if (s[i] != 0) {
+      if (dir != -1) {
+	ERR("Only cartesian stencil allowed!  s[%d] != 0 and s[%d] != 0", dir, i);
+      } else {
+	dir = i;
+	disp = s[i];
+      }
+    }
+  }
+}
+
+template<typename CartesianStencil_t>
+class CartesianStencilManager {
+public:
+  GridBase* grid;
+  const std::vector<Coordinate>& shifts;
+  std::map<int, std::set<int> > field_points;
+  std::vector<CartesianStencil_t> stencils;
+  Vector<int> stencil_map;
+  std::map<int, std::map<int, int> > field_point_map;
+
+  CartesianStencilManager(GridBase* _grid, const std::vector<Coordinate>& _shifts) : grid(_grid), shifts(_shifts) {
+  }
+
+  bool is_trivial(int point) {
+    for (auto s : shifts[point])
+      if (s != 0)
+	return false;
+    return true;
+  }
+
+  void register_point(int index, int point) {
+    // register non-trivial points
+    if (is_trivial(point))
+      return;
+
+    field_points[index].insert(point);
+
+    if ((int)stencil_map.size() < index + 1)
+      stencil_map.resize(index + 1, -1);
+  }
+
+  void create_stencils() {
+    // all fields in field_points now need a stencil
+    for (auto & fp : field_points) {
+
+      int index = fp.first;
+
+      std::vector<int> dirs, disps;
+      for (auto p : fp.second) {
+	int dir, disp;
+	cgpt_shift_to_cartesian_dir_disp(shifts[p], dir, disp);
+	ASSERT(dir != -1);
+
+	field_point_map[index][p] = (int)dirs.size();
+
+	dirs.push_back(dir);
+	disps.push_back(disp);
+
+	stencil_map[index] = (int)stencils.size();
+	stencils.push_back(CartesianStencil_t(grid,dirs.size(),Even,dirs,disps,SimpleStencilParams()));
+      }
+    }
+  }
+
+  int map_point(int index, int point) {
+    if (is_trivial(point))
+      return -1;
+    return field_point_map[index][point];
+  }
+
+};