Skip to content

Commit

Permalink
Move SPMV to MatrixCSR from test
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisrichardson committed Jan 9, 2025
1 parent fb0b994 commit 3fe5d00
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 101 deletions.
62 changes: 62 additions & 0 deletions cpp/dolfinx/la/MatrixCSR.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,12 @@ class MatrixCSR
/// @note MPI Collective
double squared_norm() const;

/// @brief Computes y += Ax for the local CSR matrix and local dense vectors
///
/// @param[in] x Input Vector
/// @param[out] y Output vector
void spmv(Vector<value_type>& x, Vector<value_type>& y);

/// @brief Index maps for the row and column space.
///
/// The row IndexMap contains ghost entries for rows which may be
Expand Down Expand Up @@ -734,4 +740,60 @@ double MatrixCSR<U, V, W, X>::squared_norm() const
}
//-----------------------------------------------------------------------------

// The matrix A is distributed across P processes by blocks of rows:
// A = | A_0 |
// | A_1 |
// | ... |
// | A_P-1 |
//
// Each submatrix A_i is owned by a single process "i" and can be further
// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
// Ai = |Ai[0] Ai[1]|
//
// If A is square, the diagonal block Ai[0] is also square and contains
// only owned columns and rows. The block Ai[1] contains ghost columns
// (unowned dofs).

// Likewise, a local vector x can be decomposed into owned and ghost blocks:
// xi = | x[0] |
// | x[1] |
//
// So the product y = Ax can be computed into two separate steps:
// y[0] = |Ai[0] Ai[1]| | x[0] | = Ai[0] x[0] + Ai[1] x[1]
// | x[1] |
//
/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
/// @param[in] x Input vector
/// @param[in, out] y Output vector
template <typename U, typename V, typename W, typename X>
void MatrixCSR<U, V, W, X>::spmv(la::Vector<U>& x, la::Vector<U>& y)
{
// start communication (update ghosts)
x.scatter_fwd_begin();

const std::int32_t nrowslocal = num_owned_rows();
std::span<const std::int64_t> Arow_ptr(row_ptr().data(), nrowslocal + 1);
std::span<const std::int32_t> Acols(cols().data(), Arow_ptr[nrowslocal]);
std::span<const std::int64_t> Aoff_diag_offset(off_diag_offset().data(),
nrowslocal);
std::span<const U> Avalues(values().data(), Arow_ptr[nrowslocal]);

std::span<const U> _x = x.array();
std::span<U> _y = y.mutable_array();

std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);

// First stage: spmv - diagonal
// yi[0] += Ai[0] * xi[0]
impl::spmv<U>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y);

// finalize ghost update
x.scatter_fwd_end();

// Second stage: spmv - off-diagonal
// yi[0] += Ai[1] * xi[1]
impl::spmv<U>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y);
}

} // namespace dolfinx::la
43 changes: 29 additions & 14 deletions cpp/dolfinx/la/matrix_csr_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,12 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols, OP op,
typename Y::value_type num_rows, int bs0, int bs1);

} // namespace impl

//-----------------------------------------------------------------------------
template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
typename X, typename Y>
void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
void insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
{
const std::size_t nc = xcols.size();
assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
Expand Down Expand Up @@ -151,9 +149,9 @@ void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
// Insert with block insertion into a regular CSR (block size 1)
template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
typename X, typename Y>
void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
void insert_blocked_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
{
const std::size_t nc = xcols.size();
assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
Expand Down Expand Up @@ -195,12 +193,10 @@ void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
// Add individual entries in block-CSR storage
template <typename OP, typename U, typename V, typename W, typename X,
typename Y>
void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols,
OP op,
[[maybe_unused]]
typename Y::value_type num_rows,
int bs0, int bs1)
void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows,
int bs0, int bs1)
{
const std::size_t nc = xcols.size();
const int nbs = bs0 * bs1;
Expand Down Expand Up @@ -237,4 +233,23 @@ void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
}
}
//-----------------------------------------------------------------------------

template <typename T>
void spmv(std::span<const T> values, std::span<const std::int64_t> row_begin,
std::span<const std::int64_t> row_end,
std::span<const std::int32_t> indices, std::span<const T> x,
std::span<T> y)
{
assert(row_begin.size() == row_end.size());
for (std::size_t i = 0; i < row_begin.size(); i++)
{
y[i] += std::inner_product(
std::next(values.begin(), row_begin[i]),
std::next(values.begin(), row_end[i]),
std::next(indices.begin(), row_begin[i]), T(0.0), std::plus<T>(),
[&x](T val, std::int32_t i) { return val * x[i]; });
}
}

} // namespace impl
} // namespace dolfinx::la
7 changes: 3 additions & 4 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ project(dolfinx-tests)

project(${PROJECT_NAME} LANGUAGES C CXX)
set(CMAKE_C_STANDARD 17) # For FFCx generated .c files.
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# set(CMAKE_CXX_STANDARD 20)
# set(CMAKE_CXX_STANDARD_REQUIRED ON)
# set(CMAKE_CXX_EXTENSIONS OFF)

# Find DOLFINx config file
find_package(DOLFINX REQUIRED)
Expand Down Expand Up @@ -76,6 +76,5 @@ target_compile_options(

# Enable testing
enable_testing()

# Test target
add_test(unittests unittests)
84 changes: 1 addition & 83 deletions cpp/test/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,88 +23,6 @@ using namespace dolfinx;

namespace
{
/// Computes y += A*x for a local CSR matrix A and local dense vectors x,y
/// @param[in] values Nonzero values of A
/// @param[in] row_begin First index of each row in the arrays values and
/// indices.
/// @param[in] row_end Last index of each row in the arrays values and indices.
/// @param[in] indices Column indices for each non-zero element of the matrix A
/// @param[in] x Input vector
/// @param[in, out] x Output vector
template <typename T>
void spmv_impl(std::span<const T> values,
std::span<const std::int64_t> row_begin,
std::span<const std::int64_t> row_end,
std::span<const std::int32_t> indices, std::span<const T> x,
std::span<T> y)
{
assert(row_begin.size() == row_end.size());
for (std::size_t i = 0; i < row_begin.size(); i++)
{
double vi{0};
for (std::int32_t j = row_begin[i]; j < row_end[i]; j++)
vi += values[j] * x[indices[j]];
y[i] += vi;
}
}

// The matrix A is distributed across P processes by blocks of rows:
// A = | A_0 |
// | A_1 |
// | ... |
// | A_P-1 |
//
// Each submatrix A_i is owned by a single process "i" and can be further
// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
// Ai = |Ai[0] Ai[1]|
//
// If A is square, the diagonal block Ai[0] is also square and contains
// only owned columns and rows. The block Ai[1] contains ghost columns
// (unowned dofs).

// Likewise, a local vector x can be decomposed into owned and ghost blocks:
// xi = | x[0] |
// | x[1] |
//
// So the product y = Ax can be computed into two separate steps:
// y[0] = |Ai[0] Ai[1]| | x[0] | = Ai[0] x[0] + Ai[1] x[1]
// | x[1] |
//
/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
/// @param[in] A Parallel CSR matrix
/// @param[in] x Input vector
/// @param[in, out] y Output vector
template <typename T>
void spmv(la::MatrixCSR<T>& A, la::Vector<T>& x, la::Vector<T>& y)
{
// start communication (update ghosts)
x.scatter_fwd_begin();

const std::int32_t nrowslocal = A.num_owned_rows();
std::span<const std::int64_t> row_ptr(A.row_ptr().data(), nrowslocal + 1);
std::span<const std::int32_t> cols(A.cols().data(), row_ptr[nrowslocal]);
std::span<const std::int64_t> off_diag_offset(A.off_diag_offset().data(),
nrowslocal);
std::span<const T> values(A.values().data(), row_ptr[nrowslocal]);

std::span<const T> _x = x.array();
std::span<T> _y = y.mutable_array();

std::span<const std::int64_t> row_begin(row_ptr.data(), nrowslocal);
std::span<const std::int64_t> row_end(row_ptr.data() + 1, nrowslocal);

// First stage: spmv - diagonal
// yi[0] += Ai[0] * xi[0]
spmv_impl<T>(values, row_begin, off_diag_offset, cols, _x, _y);

// finalize ghost update
x.scatter_fwd_end();

// Second stage: spmv - off-diagonal
// yi[0] += Ai[1] * xi[1]
spmv_impl<T>(values, off_diag_offset, row_end, cols, _x, _y);
}

/// @brief Create a matrix operator
/// @param comm The communicator to builf the matrix on
/// @return The assembled matrix
Expand Down Expand Up @@ -195,7 +113,7 @@ la::MatrixCSR<double> create_operator(MPI_Comm comm)

// Matrix A represents the action of the Laplace operator, so when
// applied to a constant vector the result should be zero
spmv(A, x, y);
A.spmv(x, y);

std::ranges::for_each(y.array(),
[](auto a) { REQUIRE(std::abs(a) < 1e-13); });
Expand Down

0 comments on commit 3fe5d00

Please sign in to comment.