Move SPMV to MatrixCSR from test

FEniCS · Jan 9, 2025 · 3fe5d00 · 3fe5d00
1 parent fb0b994
commit 3fe5d00
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 101 deletions.
diff --git a/cpp/dolfinx/la/MatrixCSR.h b/cpp/dolfinx/la/MatrixCSR.h
@@ -322,6 +322,12 @@ class MatrixCSR
   /// @note MPI Collective
   double squared_norm() const;
 
+  /// @brief Computes y += Ax for the local CSR matrix and local dense vectors
+  ///
+  /// @param[in] x Input Vector
+  /// @param[out] y Output vector
+  void spmv(Vector<value_type>& x, Vector<value_type>& y);
+
   /// @brief Index maps for the row and column space.
   ///
   /// The row IndexMap contains ghost entries for rows which may be
@@ -734,4 +740,60 @@ double MatrixCSR<U, V, W, X>::squared_norm() const
 }
 //-----------------------------------------------------------------------------
 
+// The matrix A is distributed across P  processes by blocks of rows:
+//  A = |   A_0  |
+//      |   A_1  |
+//      |   ...  |
+//      |  A_P-1 |
+//
+// Each submatrix A_i is owned by a single process "i" and can be further
+// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
+//  Ai = |Ai[0] Ai[1]|
+//
+// If A is square, the diagonal block Ai[0] is also square and contains
+// only owned columns and rows. The block Ai[1] contains ghost columns
+// (unowned dofs).
+
+// Likewise, a local vector x can be decomposed into owned and ghost blocks:
+// xi = |   x[0]  |
+//      |   x[1]  |
+//
+// So the product y = Ax can be computed into two separate steps:
+//  y[0] = |Ai[0] Ai[1]| |   x[0]  | = Ai[0] x[0] + Ai[1] x[1]
+//                       |   x[1]  |
+//
+/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
+/// @param[in] x Input vector
+/// @param[in, out] y Output vector
+template <typename U, typename V, typename W, typename X>
+void MatrixCSR<U, V, W, X>::spmv(la::Vector<U>& x, la::Vector<U>& y)
+{
+  // start communication (update ghosts)
+  x.scatter_fwd_begin();
+
+  const std::int32_t nrowslocal = num_owned_rows();
+  std::span<const std::int64_t> Arow_ptr(row_ptr().data(), nrowslocal + 1);
+  std::span<const std::int32_t> Acols(cols().data(), Arow_ptr[nrowslocal]);
+  std::span<const std::int64_t> Aoff_diag_offset(off_diag_offset().data(),
+                                                 nrowslocal);
+  std::span<const U> Avalues(values().data(), Arow_ptr[nrowslocal]);
+
+  std::span<const U> _x = x.array();
+  std::span<U> _y = y.mutable_array();
+
+  std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
+  std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);
+
+  // First stage:  spmv - diagonal
+  // yi[0] += Ai[0] * xi[0]
+  impl::spmv<U>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y);
+
+  // finalize ghost update
+  x.scatter_fwd_end();
+
+  // Second stage:  spmv - off-diagonal
+  // yi[0] += Ai[1] * xi[1]
+  impl::spmv<U>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y);
+}
+
 } // namespace dolfinx::la
diff --git a/cpp/dolfinx/la/matrix_csr_impl.h b/cpp/dolfinx/la/matrix_csr_impl.h
@@ -100,14 +100,12 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
                            const X& x, const Y& xrows, const Y& xcols, OP op,
                            typename Y::value_type num_rows, int bs0, int bs1);
 
-} // namespace impl
-
 //-----------------------------------------------------------------------------
 template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
           typename X, typename Y>
-void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
-                      const Y& xrows, const Y& xcols, OP op,
-                      [[maybe_unused]] typename Y::value_type num_rows)
+void insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
+                const Y& xrows, const Y& xcols, OP op,
+                [[maybe_unused]] typename Y::value_type num_rows)
 {
   const std::size_t nc = xcols.size();
   assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
@@ -151,9 +149,9 @@ void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
 // Insert with block insertion into a regular CSR (block size 1)
 template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
           typename X, typename Y>
-void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
-                              const X& x, const Y& xrows, const Y& xcols, OP op,
-                              [[maybe_unused]] typename Y::value_type num_rows)
+void insert_blocked_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
+                        const Y& xrows, const Y& xcols, OP op,
+                        [[maybe_unused]] typename Y::value_type num_rows)
 {
   const std::size_t nc = xcols.size();
   assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
@@ -195,12 +193,10 @@ void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
 // Add individual entries in block-CSR storage
 template <typename OP, typename U, typename V, typename W, typename X,
           typename Y>
-void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
-                                 const X& x, const Y& xrows, const Y& xcols,
-                                 OP op,
-                                 [[maybe_unused]]
-                                 typename Y::value_type num_rows,
-                                 int bs0, int bs1)
+void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
+                           const X& x, const Y& xrows, const Y& xcols, OP op,
+                           [[maybe_unused]] typename Y::value_type num_rows,
+                           int bs0, int bs1)
 {
   const std::size_t nc = xcols.size();
   const int nbs = bs0 * bs1;
@@ -237,4 +233,23 @@ void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
   }
 }
 //-----------------------------------------------------------------------------
+
+template <typename T>
+void spmv(std::span<const T> values, std::span<const std::int64_t> row_begin,
+          std::span<const std::int64_t> row_end,
+          std::span<const std::int32_t> indices, std::span<const T> x,
+          std::span<T> y)
+{
+  assert(row_begin.size() == row_end.size());
+  for (std::size_t i = 0; i < row_begin.size(); i++)
+  {
+    y[i] += std::inner_product(
+        std::next(values.begin(), row_begin[i]),
+        std::next(values.begin(), row_end[i]),
+        std::next(indices.begin(), row_begin[i]), T(0.0), std::plus<T>(),
+        [&x](T val, std::int32_t i) { return val * x[i]; });
+  }
+}
+
+} // namespace impl
 } // namespace dolfinx::la
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
@@ -3,9 +3,9 @@ project(dolfinx-tests)
 
 project(${PROJECT_NAME} LANGUAGES C CXX)
 set(CMAKE_C_STANDARD 17) # For FFCx generated .c files.
-set(CMAKE_CXX_STANDARD 20)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_EXTENSIONS OFF)
+# set(CMAKE_CXX_STANDARD 20)
+# set(CMAKE_CXX_STANDARD_REQUIRED ON)
+# set(CMAKE_CXX_EXTENSIONS OFF)
 
 # Find DOLFINx config file
 find_package(DOLFINX REQUIRED)
@@ -76,6 +76,5 @@ target_compile_options(
 
 # Enable testing
 enable_testing()
-
 # Test target
 add_test(unittests unittests)
diff --git a/cpp/test/matrix.cpp b/cpp/test/matrix.cpp
@@ -23,88 +23,6 @@ using namespace dolfinx;
 
 namespace
 {
-/// Computes y += A*x for a local CSR matrix A and local dense vectors x,y
-/// @param[in] values Nonzero values of A
-/// @param[in] row_begin First index of each row in the arrays values and
-/// indices.
-/// @param[in] row_end Last index of each row in the arrays values and indices.
-/// @param[in] indices Column indices for each non-zero element of the matrix A
-/// @param[in] x Input vector
-/// @param[in, out] x Output vector
-template <typename T>
-void spmv_impl(std::span<const T> values,
-               std::span<const std::int64_t> row_begin,
-               std::span<const std::int64_t> row_end,
-               std::span<const std::int32_t> indices, std::span<const T> x,
-               std::span<T> y)
-{
-  assert(row_begin.size() == row_end.size());
-  for (std::size_t i = 0; i < row_begin.size(); i++)
-  {
-    double vi{0};
-    for (std::int32_t j = row_begin[i]; j < row_end[i]; j++)
-      vi += values[j] * x[indices[j]];
-    y[i] += vi;
-  }
-}
-
-// The matrix A is distributed across P  processes by blocks of rows:
-//  A = |   A_0  |
-//      |   A_1  |
-//      |   ...  |
-//      |  A_P-1 |
-//
-// Each submatrix A_i is owned by a single process "i" and can be further
-// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
-//  Ai = |Ai[0] Ai[1]|
-//
-// If A is square, the diagonal block Ai[0] is also square and contains
-// only owned columns and rows. The block Ai[1] contains ghost columns
-// (unowned dofs).
-
-// Likewise, a local vector x can be decomposed into owned and ghost blocks:
-// xi = |   x[0]  |
-//      |   x[1]  |
-//
-// So the product y = Ax can be computed into two separate steps:
-//  y[0] = |Ai[0] Ai[1]| |   x[0]  | = Ai[0] x[0] + Ai[1] x[1]
-//                       |   x[1]  |
-//
-/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
-/// @param[in] A Parallel CSR matrix
-/// @param[in] x Input vector
-/// @param[in, out] y Output vector
-template <typename T>
-void spmv(la::MatrixCSR<T>& A, la::Vector<T>& x, la::Vector<T>& y)
-{
-  // start communication (update ghosts)
-  x.scatter_fwd_begin();
-
-  const std::int32_t nrowslocal = A.num_owned_rows();
-  std::span<const std::int64_t> row_ptr(A.row_ptr().data(), nrowslocal + 1);
-  std::span<const std::int32_t> cols(A.cols().data(), row_ptr[nrowslocal]);
-  std::span<const std::int64_t> off_diag_offset(A.off_diag_offset().data(),
-                                                nrowslocal);
-  std::span<const T> values(A.values().data(), row_ptr[nrowslocal]);
-
-  std::span<const T> _x = x.array();
-  std::span<T> _y = y.mutable_array();
-
-  std::span<const std::int64_t> row_begin(row_ptr.data(), nrowslocal);
-  std::span<const std::int64_t> row_end(row_ptr.data() + 1, nrowslocal);
-
-  // First stage:  spmv - diagonal
-  // yi[0] += Ai[0] * xi[0]
-  spmv_impl<T>(values, row_begin, off_diag_offset, cols, _x, _y);
-
-  // finalize ghost update
-  x.scatter_fwd_end();
-
-  // Second stage:  spmv - off-diagonal
-  // yi[0] += Ai[1] * xi[1]
-  spmv_impl<T>(values, off_diag_offset, row_end, cols, _x, _y);
-}
-
 /// @brief Create a matrix operator
 /// @param comm The communicator to builf the matrix on
 /// @return The assembled matrix
@@ -195,7 +113,7 @@ la::MatrixCSR<double> create_operator(MPI_Comm comm)
 
   // Matrix A represents the action of the Laplace operator, so when
   // applied to a constant vector the result should be zero
-  spmv(A, x, y);
+  A.spmv(x, y);
 
   std::ranges::for_each(y.array(),
                         [](auto a) { REQUIRE(std::abs(a) < 1e-13); });