From d3563a1d07635bfba57b9b53ccc6befb233840af Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Sat, 21 Sep 2024 12:48:24 +0300 Subject: [PATCH] lumi-g tweaks --- lib/cgpt/lib/distribute.h | 4 +- .../lib/distribute/global_memory_transfer.h | 43 +++++++++++++------ lib/gpt/core/domain/two_grid_base.py | 4 +- lib/gpt/core/padding.py | 9 +++- lib/gpt/qcd/gauge/smear/local_stout.py | 40 ++++++++++++++--- lib/gpt/qcd/gauge/stencil/staple.py | 1 + 6 files changed, 75 insertions(+), 26 deletions(-) diff --git a/lib/cgpt/lib/distribute.h b/lib/cgpt/lib/distribute.h index e0d9fc6c..455bcaf5 100644 --- a/lib/cgpt/lib/distribute.h +++ b/lib/cgpt/lib/distribute.h @@ -245,7 +245,6 @@ class global_memory_transfer : public global_transfer { } else if (view.type == mt_host) { acceleratorFreeCpu(view.ptr); } - } }; @@ -263,7 +262,8 @@ class global_memory_transfer : public global_transfer { } // memory buffers - std::map send_buffers, recv_buffers; + std::vector buffers; + std::map send_buffers, recv_buffers; memory_type comm_buffers_type; std::map > send_blocks, recv_blocks; diff --git a/lib/cgpt/lib/distribute/global_memory_transfer.h b/lib/cgpt/lib/distribute/global_memory_transfer.h index db2a6aaa..7d7505a5 100644 --- a/lib/cgpt/lib/distribute/global_memory_transfer.h +++ b/lib/cgpt/lib/distribute/global_memory_transfer.h @@ -510,7 +510,7 @@ void global_memory_transfer::create(const view_t& _dst, if (!local_only) { Timer("create_com_buffers"); - // optionally create communication buffers + // optionalrly create communication buffers create_comm_buffers(use_comm_buffers_of_type); } @@ -612,17 +612,32 @@ void global_memory_transfer::create_comm_buffers(memory } // allocate buffers +#define BUFFER_ALIGN 4096 +#define BUFFER_ROUNDUP(a) (((size_t)((a + BUFFER_ALIGN - 1) / BUFFER_ALIGN)) * BUFFER_ALIGN) + + size_t sz_total = 0; + for (auto & s : send_size) + sz_total += BUFFER_ROUNDUP(s.second); + for (auto & s : recv_size) + sz_total += BUFFER_ROUNDUP(s.second); + + // std::cout << GridLogMessage << "Allocate memory buffer of size " << sz_total << std::endl; + ASSERT(buffers.size() == 0); + buffers.push_back(memory_buffer(sz_total, mt)); + + sz_total = 0; + char* base = (char*)buffers[0].view.ptr; for (auto & s : send_size) { - //printf("Rank %d has a send_buffer of size %d for rank %d\n", - // this->rank, (int)s.second, (int)s.first); - send_buffers.insert(std::make_pair(s.first,memory_buffer(s.second, mt))); + memory_view mv = {mt,(void*)(base + sz_total),s.second}; + send_buffers.insert(std::make_pair(s.first, mv)); + sz_total += BUFFER_ROUNDUP(s.second); } - for (auto & s : recv_size) { - //printf("Rank %d has a recv_buffer of size %d for rank %d\n", - // this->rank, (int)s.second, (int)s.first); - recv_buffers.insert(std::make_pair(s.first,memory_buffer(s.second, mt))); + memory_view mv = {mt,(void*)(base + sz_total),s.second}; + recv_buffers.insert(std::make_pair(s.first, mv)); + sz_total += BUFFER_ROUNDUP(s.second); } + } template @@ -775,7 +790,7 @@ void global_memory_transfer::execute(std::vector::execute(std::vectorisend(buf.first, buf.second.view.ptr, buf.second.view.sz); + this->isend(buf.first, buf.second.ptr, buf.second.sz); stats_isends += 1; - stats_send_bytes += buf.second.view.sz; + stats_send_bytes += buf.second.sz; } for (auto & buf : recv_buffers) { - this->irecv(buf.first, buf.second.view.ptr, buf.second.view.sz); + this->irecv(buf.first, buf.second.ptr, buf.second.sz); stats_irecvs += 1; - stats_recv_bytes += buf.second.view.sz; + stats_recv_bytes += buf.second.sz; } } @@ -835,7 +850,7 @@ void global_memory_transfer::execute(std::vector