From f67588fcfdaa3ce7788310c1b6993a6442c46543 Mon Sep 17 00:00:00 2001 From: Kengo Tomida Date: Sat, 31 Aug 2024 23:09:29 +0900 Subject: [PATCH 1/9] First implementation --- src/athena.hpp | 20 ++ src/mesh/amr_loadbalance.cpp | 350 +++++++++++++++++++++++++++++------ src/mesh/mesh.hpp | 15 +- src/mesh/mesh_refinement.cpp | 54 ++++++ src/mesh/mesh_refinement.hpp | 21 +++ src/multigrid/multigrid.hpp | 18 -- 6 files changed, 405 insertions(+), 73 deletions(-) diff --git a/src/athena.hpp b/src/athena.hpp index 90ca38a27f..bed1600b52 100644 --- a/src/athena.hpp +++ b/src/athena.hpp @@ -78,6 +78,26 @@ struct LogicalLocation { // aggregate and POD type //! prototype for overloading the comparison operator (defined in meshblock_tree.cpp) bool operator==(const LogicalLocation &l1, const LogicalLocation &l2); + +//! \fn inline std::int64_t rotl(std::int64_t i, int s) +// \brief left bit rotation function for 64bit integers (unsafe if s > 64) + +inline std::int64_t rotl(std::int64_t i, int s) { + return (i << s) | (i >> (64 - s)); +} + + +//! \struct LogicalLocationHash +// \brief Hash function object for LogicalLocation + +struct LogicalLocationHash { + public: + std::size_t operator()(const LogicalLocation &l) const { + return static_cast(l.lx1^rotl(l.lx2,21)^rotl(l.lx3,42)); + } +}; + + //---------------------------------------------------------------------------------------- //! \struct RegionSize //! \brief physical size and number of cells in a Mesh or a MeshBlock diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 19364164e6..5f3440b782 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -378,6 +378,8 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { int on = newtoold[n]; if (newloc[n].level>current_level) // set the current max level current_level = newloc[n].level; + if (newloc[n].level > loclist[on].level) // refined + refined_.push_back(n); if (newloc[n].level >= loclist[on].level) { // same or refined newcost[n] = costlist[on]; } else { @@ -394,9 +396,9 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { int nbs = nslist[Globals::my_rank]; int nbe = nbs + nblist[Globals::my_rank] - 1; - int bnx1 = my_blocks(0)->block_size.nx1; - int bnx2 = my_blocks(0)->block_size.nx2; - int bnx3 = my_blocks(0)->block_size.nx3; + const int bnx1 = my_blocks(0)->block_size.nx1; + const int bnx2 = my_blocks(0)->block_size.nx2; + const int bnx3 = my_blocks(0)->block_size.nx3; #ifdef MPI_PARALLEL // Step 3. count the number of the blocks to be sent / received @@ -545,6 +547,9 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { } // if (nsend !=0) #endif // MPI_PARALLEL + if (my_blocks(0)->pmr->pvars_fc_.size() > 0) + PrepareAndSendFaceFieldCorrection(newloc, ranklist, newrank, nslist, nbtold); + // Step 7. construct a new MeshBlock list (moving the data within the MPI rank) AthenaArray newlist; newlist.NewAthenaArray(nblist[Globals::my_rank]); @@ -620,19 +625,36 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { } else { // c2f if (ranklist[on] == Globals::my_rank) continue; MPI_Wait(&(req_recv[rb_idx]), MPI_STATUS_IGNORE); - FinishRecvCoarseToFineAMR(pb, recvbuf[rb_idx]); + ReceiveCoarseToFineAMR(pb, recvbuf[rb_idx]); rb_idx++; } } } #endif + if (my_blocks(0)->pmr->pvars_fc_.size() > 0) + ReceiveAndSetFaceFieldCorrection(newrank); + + // Step 8b. Prolongate MeshBlocks + for (int n=nbs; n<=nbe; n++) { + int on = newtoold[n]; + LogicalLocation const &oloc = loclist[on]; + LogicalLocation const &nloc = newloc[n]; + if (oloc.level < nloc.level) { // c2f + MeshBlock *pb = FindMeshBlock(n); + ProlongateMeshBlock(pb); + } + } + // deallocate arrays delete [] loclist; delete [] ranklist; delete [] costlist; delete [] newtoold; delete [] oldtonew; + refined_.clear(); + ffc_send_.clear(); + ffc_recv_.clear(); #ifdef MPI_PARALLEL if (nsend != 0) { MPI_Waitall(nsend, req_send, MPI_STATUSES_IGNORE); @@ -949,8 +971,7 @@ void Mesh::FillSameRankFineToCoarseAMR(MeshBlock* pob, MeshBlock* pmb, //---------------------------------------------------------------------------------------- //! \fn void Mesh::FillSameRankCoarseToFineAMR(MeshBlock* pob, MeshBlock* pmb, //! LogicalLocation &newloc) -//! \brief step 7: c2f, same MPI rank, different level -//! (just copy+prolongate, no pack/send) +//! \brief step 7: c2f, same MPI rank, different level (just copy, no pack/send) void Mesh::FillSameRankCoarseToFineAMR(MeshBlock* pob, MeshBlock* pmb, LogicalLocation &newloc) { @@ -979,9 +1000,6 @@ void Mesh::FillSameRankCoarseToFineAMR(MeshBlock* pob, MeshBlock* pmb, } } } - pmr->ProlongateCellCenteredValues( - dst, *var_cc, 0, nu, - pob->cis, pob->cie, pob->cjs, pob->cje, pob->cks, pob->cke); pob_cc_it++; } @@ -1003,9 +1021,6 @@ void Mesh::FillSameRankCoarseToFineAMR(MeshBlock* pob, MeshBlock* pmb, } } } - pmr->ProlongateCellCenteredValues( - dst, var_cc, -1, 0, nu, - pob->cis, pob->cie, pob->cjs, pob->cje, pob->cks, pob->cke); } auto pob_fc_it = pob->pmr->pvars_fc_.begin(); @@ -1034,18 +1049,6 @@ void Mesh::FillSameRankCoarseToFineAMR(MeshBlock* pob, MeshBlock* pmb, dst_b.x3f(k, j, i) = src_b.x3f(ck, cj, ci); } } - pmr->ProlongateSharedFieldX1( - dst_b.x1f, (*var_fc).x1f, - pob->cis, pob->cie+1, pob->cjs, pob->cje, pob->cks, pob->cke); - pmr->ProlongateSharedFieldX2( - dst_b.x2f, (*var_fc).x2f, - pob->cis, pob->cie, pob->cjs, pob->cje+f2, pob->cks, pob->cke); - pmr->ProlongateSharedFieldX3( - dst_b.x3f, (*var_fc).x3f, - pob->cis, pob->cie, pob->cjs, pob->cje, pob->cks, pob->cke+f3); - pmr->ProlongateInternalField( - *var_fc, pob->cis, pob->cie, - pob->cjs, pob->cje, pob->cks, pob->cke); pob_fc_it++; } return; @@ -1055,6 +1058,7 @@ void Mesh::FillSameRankCoarseToFineAMR(MeshBlock* pob, MeshBlock* pmb, //! \fn void Mesh::FinishRecvSameLevel(MeshBlock *pb, Real *recvbuf) //! \brief step 8 (receive and load), branch 1 (same2same: unpack) void Mesh::FinishRecvSameLevel(MeshBlock *pb, Real *recvbuf) { + MeshRefinement *pmr = pb->pmr; int p = 0; for (AthenaArray &var_cc : pb->vars_cc_) { int nu = var_cc.GetDim4() - 1; @@ -1091,7 +1095,7 @@ void Mesh::FinishRecvSameLevel(MeshBlock *pb, Real *recvbuf) { //! appended integer from received buffer is slightly unsafe if (adaptive) { int *dcp = reinterpret_cast(&(recvbuf[p])); - pb->pmr->deref_count_ = *dcp; + pmr->deref_count_ = *dcp; } return; } @@ -1103,6 +1107,7 @@ void Mesh::FinishRecvSameLevel(MeshBlock *pb, Real *recvbuf) { void Mesh::FinishRecvFineToCoarseAMR(MeshBlock *pb, Real *recvbuf, LogicalLocation &lloc) { + MeshRefinement *pmr = pb->pmr; int ox1 = ((lloc.lx1 & 1LL) == 1LL), ox2 = ((lloc.lx2 & 1LL) == 1LL), ox3 = ((lloc.lx3 & 1LL) == 1LL); int p = 0, il, iu, jl, ju, kl, ku; @@ -1113,7 +1118,7 @@ void Mesh::FinishRecvFineToCoarseAMR(MeshBlock *pb, Real *recvbuf, if (ox3 == 0) kl = pb->ks, ku = pb->ks + pb->block_size.nx3/2 - f3; else kl = pb->ks + pb->block_size.nx3/2, ku = pb->ke; - for (auto cc_pair : pb->pmr->pvars_cc_) { + for (auto cc_pair : pmr->pvars_cc_) { AthenaArray *var_cc = std::get<0>(cc_pair); int nu = var_cc->GetDim4() - 1; BufferUtility::UnpackData(recvbuf, *var_cc, 0, nu, @@ -1126,7 +1131,7 @@ void Mesh::FinishRecvFineToCoarseAMR(MeshBlock *pb, Real *recvbuf, il, iu, jl, ju, p); } - for (auto fc_pair : pb->pmr->pvars_fc_) { + for (auto fc_pair : pmr->pvars_fc_) { FaceField *var_fc = std::get<0>(fc_pair); FaceField &dst_b = *var_fc; BufferUtility::UnpackData(recvbuf, dst_b.x1f, @@ -1150,41 +1155,31 @@ void Mesh::FinishRecvFineToCoarseAMR(MeshBlock *pb, Real *recvbuf, } //---------------------------------------------------------------------------------------- -//! \fn void Mesh::FinishRecvCoarseToFineAMR(MeshBlock *pb, Real *recvbuf) -//! \brief step 8 (receive and load), branch 2 (c2f: unpack+prolongate) +//! \fn void Mesh::ReceiveCoarseToFineAMR(MeshBlock *pb, Real *recvbuf) +//! \brief step 8, branch 2 (c2f: receive and unpack) -void Mesh::FinishRecvCoarseToFineAMR(MeshBlock *pb, Real *recvbuf) { +void Mesh::ReceiveCoarseToFineAMR(MeshBlock *pb, Real *recvbuf) { MeshRefinement *pmr = pb->pmr; int p = 0; int il = pb->cis - 1, iu = pb->cie+1, jl = pb->cjs - f2, ju = pb->cje + f2, kl = pb->cks - f3, ku = pb->cke + f3; - for (auto cc_pair : pb->pmr->pvars_cc_) { + for (auto cc_pair : pmr->pvars_cc_) { AthenaArray *var_cc = std::get<0>(cc_pair); AthenaArray *coarse_cc = std::get<1>(cc_pair); int nu = var_cc->GetDim4() - 1; BufferUtility::UnpackData(recvbuf, *coarse_cc, 0, nu, il, iu, jl, ju, kl, ku, p); - pmr->ProlongateCellCenteredValues( - *coarse_cc, *var_cc, 0, nu, - pb->cis, pb->cie, pb->cjs, pb->cje, pb->cks, pb->cke); } - if ((NR_RADIATION_ENABLED|| IM_RADIATION_ENABLED)) { - // copy from pmb block AthenaArray &var_cc = pb->pnrrad->ir; AthenaArray &coarse_cc = pb->pnrrad->coarse_ir_; int nu = var_cc.GetDim1() - 1; - BufferUtility::UnpackData(recvbuf, coarse_cc, kl, ku, 0, nu, il, iu, jl, ju, p); - - pmr->ProlongateCellCenteredValues( - coarse_cc, var_cc, -1, 0, nu, - pb->cis, pb->cie, pb->cjs, pb->cje, pb->cks, pb->cke); } - for (auto fc_pair : pb->pmr->pvars_fc_) { + for (auto fc_pair : pmr->pvars_fc_) { FaceField *var_fc = std::get<0>(fc_pair); FaceField *coarse_fc = std::get<1>(fc_pair); @@ -1194,22 +1189,253 @@ void Mesh::FinishRecvCoarseToFineAMR(MeshBlock *pb, Real *recvbuf) { il, iu, jl, ju+f2, kl, ku, p); BufferUtility::UnpackData(recvbuf, (*coarse_fc).x3f, il, iu, jl, ju, kl, ku+f3, p); - pmr->ProlongateSharedFieldX1( - (*coarse_fc).x1f, (*var_fc).x1f, - pb->cis, pb->cie+1, pb->cjs, pb->cje, pb->cks, pb->cke); - pmr->ProlongateSharedFieldX2( - (*coarse_fc).x2f, (*var_fc).x2f, - pb->cis, pb->cie, pb->cjs, pb->cje+f2, pb->cks, pb->cke); - pmr->ProlongateSharedFieldX3( - (*coarse_fc).x3f, (*var_fc).x3f, - pb->cis, pb->cie, pb->cjs, pb->cje, pb->cks, pb->cke+f3); - pmr->ProlongateInternalField( - *var_fc, pb->cis, pb->cie, - pb->cjs, pb->cje, pb->cks, pb->cke); } return; } +//---------------------------------------------------------------------------------------- +//! \fn void Mesh::ProlongateMeshBlock(MeshBlock *pb) +//! \brief step 8, branch 2 (c2f: prolongate) + +void Mesh::ProlongateMeshBlock(MeshBlock *pb) { + MeshRefinement *pmr = pb->pmr; + for (auto cc_pair : pmr->pvars_cc_) { + AthenaArray *var_cc = std::get<0>(cc_pair); + AthenaArray *coarse_cc = std::get<1>(cc_pair); + int nu = var_cc->GetDim4() - 1; + pmr->ProlongateCellCenteredValues(*coarse_cc, *var_cc, 0, nu, + pb->cis, pb->cie, pb->cjs, pb->cje, pb->cks, pb->cke); + } + + + if ((NR_RADIATION_ENABLED|| IM_RADIATION_ENABLED)) { + // copy from pmb block + AthenaArray &var_cc = pb->pnrrad->ir; + AthenaArray &coarse_cc = pb->pnrrad->coarse_ir_; + int nu = var_cc.GetDim1() - 1; + + pmr->ProlongateCellCenteredValues(coarse_cc, var_cc, -1, 0, nu, + pb->cis, pb->cie, pb->cjs, pb->cje, pb->cks, pb->cke); + } + + int il = pb->cis, iu = pb->cie+1, jl = pb->cjs, ju = pb->cje + f2, + kl = pb->cks, ku = pb->cke + f3; + // Step FFC6. skip the surface fields contacting previously refined cells + if (pmr->flag_ffc_recv_[BoundaryFace::inner_x1]) il++; + if (pmr->flag_ffc_recv_[BoundaryFace::outer_x1]) iu--; + if (pmr->flag_ffc_recv_[BoundaryFace::inner_x2]) jl++; + if (pmr->flag_ffc_recv_[BoundaryFace::outer_x2]) ju--; + if (pmr->flag_ffc_recv_[BoundaryFace::inner_x3]) kl++; + if (pmr->flag_ffc_recv_[BoundaryFace::outer_x3]) ku--; + + for (auto fc_pair : pmr->pvars_fc_) { + FaceField *var_fc = std::get<0>(fc_pair); + FaceField *coarse_fc = std::get<1>(fc_pair); + + pmr->ProlongateSharedFieldX1((*coarse_fc).x1f, (*var_fc).x1f, + il, iu, pb->cjs, pb->cje, pb->cks, pb->cke); + pmr->ProlongateSharedFieldX2((*coarse_fc).x2f, (*var_fc).x2f, + pb->cis, pb->cie, jl, ju, pb->cks, pb->cke); + pmr->ProlongateSharedFieldX3((*coarse_fc).x3f, (*var_fc).x3f, + pb->cis, pb->cie, pb->cjs, pb->cje, kl, ku); + pmr->ProlongateInternalField(*var_fc, pb->cis, pb->cie, + pb->cjs, pb->cje, pb->cks, pb->cke); + } + return; +} + + +//---------------------------------------------------------------------------------------- +//! \fn void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, +//! int *ranklist, int *newrank, int *nslist, int nbtold) +//! \brief the first part of the face field correction + +void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, + int *ranklist, int *newrank, int *nslist, int nbtold) { + const int bnx1 = my_blocks(0)->block_size.nx1; + const int bnx2 = my_blocks(0)->block_size.nx2; + const int bnx3 = my_blocks(0)->block_size.nx3; + const int nf = my_blocks(0)->pmr->pvars_fc_.size(); + const int sffc1 = nf*bnx2*bnx3, sffc2 = nf*bnx1*bnx3, sffc3 = nf*bnx2*bnx3; + // Step FFC1. Find pairs that need face field correction + int s = 0; + for (int n : refined_) { // loop over refined MeshBlocks + LogicalLocation const &nloc = newloc[n]; + int xf1 = static_cast(nloc.lx1 & 1LL); + int xf2 = static_cast(nloc.lx2 & 1LL); + int xf3 = static_cast(nloc.lx3 & 1LL); + std::int64_t o1, o2, o3; + o1 = (xf1 << 1) - 1; + o2 = (xf2 << 1) - 1; + o3 = (xf3 << 1) - 1; + // *** KT: This loop over all the current MeshBlcks is a naive implementation. + // The MeshBlockTree is already updated at this point, so we have to search loclist. + // Perhaps we should use std::unordered_map if this part costs too much. + // We already have such an example in MultigridDriver (octet map). + for (int i = 0; i < nbtold; ++i) { + LogicalLocation const &floc = loclist[i]; + if (nloc.level == floc.level) { + if (nloc.lx1 + o1 == floc.lx1 && nloc.lx2 == floc.lx2 && nloc.lx3 == floc.lx3) { + if (ranklist[i] == Globals::my_rank) { + ffc_send_.emplace_back(i, n, xf1^1, sffc1, -1); + if (newrank[n] == Globals::my_rank) + ffc_recv_.emplace_back(i, n, xf1, 0, s); + s++; + } else if (newrank[n] == Globals::my_rank) { + ffc_recv_.emplace_back(i, n, xf1, sffc1, -1); + } + } + if (f2) { + if (nloc.lx1 == floc.lx1 && nloc.lx2 + o2 == floc.lx2 && nloc.lx3 == floc.lx3) { + if (ranklist[i] == Globals::my_rank) { + ffc_send_.emplace_back(i, n, xf2^1 + 2, sffc2, -1); + if (newrank[n] == Globals::my_rank) + ffc_recv_.emplace_back(i, n, xf2 + 2, 0, s); + s++; + } else if (newrank[n] == Globals::my_rank) { + ffc_recv_.emplace_back(i, n, xf2 + 2, sffc2, -1); + } + } + } + if (f3) { + if (nloc.lx1 == floc.lx1 && nloc.lx2 == floc.lx2 && nloc.lx3 + o3 == floc.lx3) { + if (ranklist[i] == Globals::my_rank) { + ffc_send_.emplace_back(i, n, xf3^1 + 4, sffc3, -1); + if (newrank[n] == Globals::my_rank) + ffc_recv_.emplace_back(i, n, xf3 + 4, 0, s); + s++; + } else if (newrank[n] == Globals::my_rank) { + ffc_recv_.emplace_back(i, n, xf3 + 4, sffc3, -1); + } + } + } + } + } + } + + // Step FFC2. Initiate MPI receive +#ifdef MPI_PARALLEL + for (FaceFieldCorrection &t : ffc_recv_) { + if (ranklist[t.from] != Globals::my_rank) { + int tag = CreateFaceFieldCorrectionMPITag(t.to-nslist[newrank[t.to]], t.face); + MPI_Irecv(t.buf, t.size, MPI_ATHENA_REAL, ranklist[t.from], + tag, MPI_COMM_WORLD, &(t.req)); + } + } +#endif + + // Step FFC3. Pack and send shared face fields + for (FaceFieldCorrection &f : ffc_send_) { + MeshBlock *pmb = FindMeshBlock(f.from); + int p = 0; + for (FaceField &var_fc : pmb->vars_fc_) { + switch (f.face) { + case BoundaryFace::inner_x1: + BufferUtility::PackData(var_fc.x1f, f.buf, pmb->is, pmb->is, + pmb->js, pmb->je, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::outer_x1: + BufferUtility::PackData(var_fc.x1f, f.buf, pmb->ie+1, pmb->ie+1, + pmb->js, pmb->je, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::inner_x2: + BufferUtility::PackData(var_fc.x2f, f.buf, pmb->is, pmb->ie, + pmb->js, pmb->js, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::outer_x2: + BufferUtility::PackData(var_fc.x2f, f.buf, pmb->is, pmb->ie, + pmb->je+f2, pmb->je+f2, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::inner_x3: + BufferUtility::PackData(var_fc.x3f, f.buf, pmb->is, pmb->ie, + pmb->js, pmb->je, pmb->ks, pmb->ks, p); + break; + case BoundaryFace::outer_x3: + BufferUtility::PackData(var_fc.x3f, f.buf, pmb->is, pmb->ie, + pmb->js, pmb->je, pmb->ke+f3, pmb->ke+f3, p); + break; + default: + break; + } + } +#ifdef MPI_PARALLEL + if (newrank[f.to] != Globals::my_rank) { + int face = f.face^1; + int tag = CreateFaceFieldCorrectionMPITag(f.to-nslist[newrank[f.to]], face); + MPI_Isend(f.buf, f.size, MPI_ATHENA_REAL, newrank[f.to], + tag, MPI_COMM_WORLD, &(f.req)); + } +#endif + } +} + + +//---------------------------------------------------------------------------------------- +//! \fn void Mesh::ReceiveAndSetFaceFieldCorrection(int *newrank) +//! \brief the second part of the face field correction + +void Mesh::ReceiveAndSetFaceFieldCorrection(int *newrank) { + // Step FFC4. clear face field correction flags + for (int n = 0; n < nblocal; ++n) { + MeshRefinement *pmr = my_blocks(n)->pmr; + for (int i = 0; i < 6; ++i) + pmr->flag_ffc_recv_[i] = false; + } + + // Step FFC5. wait and unpack/set + for (FaceFieldCorrection &t : ffc_recv_) { + MeshBlock *pmb = FindMeshBlock(t.to); + pmb->pmr->flag_ffc_recv_[t.face] = true; + Real *buf = t.buf; + if (ranklist[t.from] == Globals::my_rank) // local copy + buf = ffc_send_[t.src].buf; +#ifdef MPI_PARALLEL + else // MPI + MPI_Wait(&(t.req), MPI_STATUS_IGNORE); +#endif + int p = 0; + for (FaceField &var_fc : pmb->vars_fc_) { + switch (t.face) { + case BoundaryFace::inner_x1: + BufferUtility::UnpackData(buf, var_fc.x1f, pmb->is, pmb->is, + pmb->js, pmb->je, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::outer_x1: + BufferUtility::UnpackData(buf, var_fc.x1f, pmb->ie+1, pmb->ie+1, + pmb->js, pmb->je, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::inner_x2: + BufferUtility::UnpackData(buf, var_fc.x2f, pmb->is, pmb->ie, + pmb->js, pmb->js, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::outer_x2: + BufferUtility::UnpackData(buf, var_fc.x2f, pmb->is, pmb->ie, + pmb->je+f2, pmb->je+f2, pmb->ks, pmb->ke, p); + break; + case BoundaryFace::inner_x3: + BufferUtility::UnpackData(buf, var_fc.x3f, pmb->is, pmb->ie, + pmb->js, pmb->je, pmb->ks, pmb->ks, p); + break; + case BoundaryFace::outer_x3: + BufferUtility::UnpackData(buf, var_fc.x3f, pmb->is, pmb->ie, + pmb->js, pmb->je, pmb->ke+f3, pmb->ke+f3, p); + break; + default: + break; + } + } + } + + // Step FFC6. Finalize MPI send +#ifdef MPI_PARALLEL + for (FaceFieldCorrection &f : ffc_send_) { + if (newrank[f.to] != Globals::my_rank) { + MPI_Wait(&(f.req), MPI_STATUS_IGNORE); + } + } +#endif +} + //---------------------------------------------------------------------------------------- //! \fn int Mesh::CreateAMRMPITag(int lid, int ox1, int ox2, int ox3) //! \brief calculate an MPI tag for AMR block transfer @@ -1223,3 +1449,19 @@ int Mesh::CreateAMRMPITag(int lid, int ox1, int ox2, int ox3) { // former "AthenaTagMPI" AthenaTagMPI::amr=8 redefined to 0 return (lid<<8) | (ox1<<7)| (ox2<<6) | (ox3<<5); } + +//---------------------------------------------------------------------------------------- +//! \fn int Mesh::CreateFaceFieldCorrectionMPITag(int lid, int face) +//! \brief calculate an MPI tag for face field correction +//! +//! tag = local id of destination (remaining bits) + ox1(1 bit) + ox2(1 bit) + ox3(1 bit) +//! + physics(5 bits) +//! + +int Mesh::CreateFaceFieldCorrectionMPITag(int lid, int face) { + // Set the bottom bit = 1 + // It should be OK as this communication does not overlap with te main integrator, + // and all we need is to distinguish FFC communications from AMR communications. + return (lid<<8) | (face << 5) | 1; +} + diff --git a/src/mesh/mesh.hpp b/src/mesh/mesh.hpp index 812b702cee..427182057b 100644 --- a/src/mesh/mesh.hpp +++ b/src/mesh/mesh.hpp @@ -16,6 +16,7 @@ // C++ headers #include // int64_t #include // reference_wrapper +#include #include #include @@ -341,6 +342,11 @@ class Mesh { double lb_tolerance_; int lb_interval_; + // for AMR face field correction + std::unordered_map *locmap_; + std::vector refined_; + std::vector ffc_send_, ffc_recv_; + // functions MeshGenFunc MeshGenerator_[3]; BValFunc BoundaryFunction_[6]; @@ -392,7 +398,14 @@ class Mesh { // step 8: receive void FinishRecvSameLevel(MeshBlock *pb, Real *recvbuf); void FinishRecvFineToCoarseAMR(MeshBlock *pb, Real *recvbuf, LogicalLocation &lloc); - void FinishRecvCoarseToFineAMR(MeshBlock *pb, Real *recvbuf); + void ReceiveCoarseToFineAMR(MeshBlock *pb, Real *recvbuf); + void ProlongateMeshBlock(MeshBlock *pb); + + // Face field correction + void PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, + int *ranklist, int *newrank, int *nslist, int nbtold); + void ReceiveAndSetFaceFieldCorrection(int *newrank); + int CreateFaceFieldCorrectionMPITag(int lid, int face); //! defined in either the prob file or default_pgen.cpp in ../pgen/ void InitUserMeshData(ParameterInput *pin); diff --git a/src/mesh/mesh_refinement.cpp b/src/mesh/mesh_refinement.cpp index 7c402995ee..89aacf9678 100644 --- a/src/mesh/mesh_refinement.cpp +++ b/src/mesh/mesh_refinement.cpp @@ -1504,3 +1504,57 @@ void MeshRefinement::SetHydroRefinement(HydroBoundaryQuantity hydro_type) { } return; } + + +// Constructor +FaceFieldCorrection::FaceFieldCorrection(int ifrom, int ito, int iface, int isize, + int isrc) : from(ifrom), to(ito), face(iface), size(isize), src(isrc) { + if (size == 0) + buf = nullptr; + else + buf = new Real[size]; +#ifdef MPI_PARALLEL + req = MPI_REQUEST_NULL; +#endif +} + + +// Copy constructor +FaceFieldCorrection::FaceFieldCorrection(const FaceFieldCorrection& c) { + from = c.from, to = c.to, face = c.face, size = c.size, src = c.src; + if (size == 0) + buf = nullptr; + else + buf = new Real[size]; + for (int i = 0; i < size; ++i) + buf[i] = c.buf[i]; +#ifdef MPI_PARALLEL + req = c.req; +#endif +} + + +// Move constructor +FaceFieldCorrection::FaceFieldCorrection(FaceFieldCorrection&& c) { + from = c.from, to = c.to, face = c.face, size = c.size, src = c.src; + buf = c.buf; + c.buf = nullptr; +#ifdef MPI_PARALLEL + req = c.req; +#endif +} + + +// Copy assignment operator +FaceFieldCorrection& FaceFieldCorrection::operator=(const FaceFieldCorrection &c) { + if (this != &c) { + from = c.from, to = c.to, face = c.face, size = c.size, src = c.src; + for (int i = 0; i < size; ++i) + buf[i] = c.buf[i]; + } +#ifdef MPI_PARALLEL + req = c.req; +#endif + return *this; +} + diff --git a/src/mesh/mesh_refinement.hpp b/src/mesh/mesh_refinement.hpp index 208b41340e..feee602e9b 100644 --- a/src/mesh/mesh_refinement.hpp +++ b/src/mesh/mesh_refinement.hpp @@ -107,6 +107,27 @@ class MeshRefinement { // tuples of references to AMR-enrolled arrays (quantity, coarse_quantity) std::vector *, AthenaArray *>> pvars_cc_; std::vector> pvars_fc_; + + bool flag_ffc_recv_[6]; +}; + + +//---------------------------------------------------------------------------------------- +//! \struct FaceFieldCorrection +//! \brief + +struct FaceFieldCorrection { + int from, to, face, size, src; + Real *buf; +#ifdef MPI_PARALLEL + MPI_Request req; +#endif + + FaceFieldCorrection(int ifrom, int ito, int iface, int isize, int isrc); + ~FaceFieldCorrection() { delete [] buf; } + FaceFieldCorrection(const FaceFieldCorrection& c); + FaceFieldCorrection(FaceFieldCorrection&& c); + FaceFieldCorrection& operator=(const FaceFieldCorrection &c); }; #endif // MESH_MESH_REFINEMENT_HPP_ diff --git a/src/multigrid/multigrid.hpp b/src/multigrid/multigrid.hpp index 8274f8797a..401beacf58 100644 --- a/src/multigrid/multigrid.hpp +++ b/src/multigrid/multigrid.hpp @@ -40,24 +40,6 @@ enum class MGNormType {max, l1, l2}; constexpr int minth_ = 8; -//! \fn inline std::int64_t rotl(std::int64_t i, int s) -// \brief left bit rotation function for 64bit integers (unsafe if s > 64) - -inline std::int64_t rotl(std::int64_t i, int s) { - return (i << s) | (i >> (64 - s)); -} - - -//! \struct LogicalLocationHash -// \brief Hash function object for LogicalLocation - -struct LogicalLocationHash { - public: - std::size_t operator()(const LogicalLocation &l) const { - return static_cast(l.lx1^rotl(l.lx2,21)^rotl(l.lx3,42)); - } -}; - //! \class MGCoordinates // \brief Minimum set of coordinate arrays for Multigrid From 339840b9ef33d8e4b92e1ef09bea6641fb383fe9 Mon Sep 17 00:00:00 2001 From: Kengo Tomida Date: Sun, 1 Sep 2024 19:17:05 +0900 Subject: [PATCH 2/9] Use unordered_map, support periodic BC --- src/mesh/amr_loadbalance.cpp | 134 +++++++++++++++++++++++++---------- src/mesh/mesh.cpp | 7 ++ 2 files changed, 105 insertions(+), 36 deletions(-) diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 5f3440b782..6de13550bb 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -652,9 +652,14 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { delete [] costlist; delete [] newtoold; delete [] oldtonew; - refined_.clear(); - ffc_send_.clear(); - ffc_recv_.clear(); + if (my_blocks(0)->pmr->pvars_fc_.size() > 0) { + refined_.clear(); + ffc_send_.clear(); + ffc_recv_.clear(); + for (int l = 0; l < max_level - root_level - 1; ++l) + locmap_[l].clear(); + } + #ifdef MPI_PARALLEL if (nsend != 0) { MPI_Waitall(nsend, req_send, MPI_STATUSES_IGNORE); @@ -1257,6 +1262,16 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, const int bnx3 = my_blocks(0)->block_size.nx3; const int nf = my_blocks(0)->pmr->pvars_fc_.size(); const int sffc1 = nf*bnx2*bnx3, sffc2 = nf*bnx1*bnx3, sffc3 = nf*bnx2*bnx3; + + // Step FFC1. Construct a hash map of MeshBlocks before refinement + for (int i = 0; i < nbtold; ++i) { + LogicalLocation const &loc = loclist[i]; + if (loc.level > root_level) { + int lev = loc.level - root_level - 1; + locmap_[lev][loc] = i; + } + } + // Step FFC1. Find pairs that need face field correction int s = 0; for (int n : refined_) { // loop over refined MeshBlocks @@ -1268,45 +1283,92 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, o1 = (xf1 << 1) - 1; o2 = (xf2 << 1) - 1; o3 = (xf3 << 1) - 1; - // *** KT: This loop over all the current MeshBlcks is a naive implementation. - // The MeshBlockTree is already updated at this point, so we have to search loclist. - // Perhaps we should use std::unordered_map if this part costs too much. - // We already have such an example in MultigridDriver (octet map). - for (int i = 0; i < nbtold; ++i) { - LogicalLocation const &floc = loclist[i]; - if (nloc.level == floc.level) { - if (nloc.lx1 + o1 == floc.lx1 && nloc.lx2 == floc.lx2 && nloc.lx3 == floc.lx3) { + LogicalLocation floc = nloc; + int lev = floc.level - root_level - 1; + floc.lx1 += o1; + bool fskip = false; + if (floc.lx1 < 0) { + if (mesh_bcs[BoundaryFace::inner_x1] == BoundaryFlag::periodic) + floc.lx1 = (nrbx1 << (lev+1)) - 1; + else + fskip = true; + } else if (floc.lx1 >= (nrbx1 << (lev+1))) { + if (mesh_bcs[BoundaryFace::outer_x1] == BoundaryFlag::periodic) + floc.lx1 = 0; + else + fskip = true; + } + if (!fskip) { + auto itr = locmap_[lev].find(floc); + if (itr != locmap_[lev].end()) { + const auto& it = *itr; + int i = it.second; + if (ranklist[i] == Globals::my_rank) { + ffc_send_.emplace_back(i, n, xf1^1, sffc1, -1); + if (newrank[n] == Globals::my_rank) + ffc_recv_.emplace_back(i, n, xf1, 0, s); + s++; + } else if (newrank[n] == Globals::my_rank) { + ffc_recv_.emplace_back(i, n, xf1, sffc1, -1); + } + } + } + if (f2) { + floc.lx1 = nloc.lx1, floc.lx2 += o2; + fskip = false; + if (floc.lx2 < 0) { + if (mesh_bcs[BoundaryFace::inner_x2] == BoundaryFlag::periodic) + floc.lx2 = (nrbx2 << (lev+1)) - 1; + else + fskip = true; + } else if (floc.lx2 >= (nrbx2 << (lev+1))) { + if (mesh_bcs[BoundaryFace::outer_x2] == BoundaryFlag::periodic) + floc.lx2 = 0; + else + fskip = true; + } + if (!fskip) { + auto itr = locmap_[lev].find(floc); + if (itr != locmap_[lev].end()) { + const auto& it = *itr; + int i = it.second; if (ranklist[i] == Globals::my_rank) { - ffc_send_.emplace_back(i, n, xf1^1, sffc1, -1); + ffc_send_.emplace_back(i, n, xf2^1 + 2, sffc2, -1); if (newrank[n] == Globals::my_rank) - ffc_recv_.emplace_back(i, n, xf1, 0, s); + ffc_recv_.emplace_back(i, n, xf2 + 2, 0, s); s++; } else if (newrank[n] == Globals::my_rank) { - ffc_recv_.emplace_back(i, n, xf1, sffc1, -1); + ffc_recv_.emplace_back(i, n, xf2 + 2, sffc2, -1); } } - if (f2) { - if (nloc.lx1 == floc.lx1 && nloc.lx2 + o2 == floc.lx2 && nloc.lx3 == floc.lx3) { - if (ranklist[i] == Globals::my_rank) { - ffc_send_.emplace_back(i, n, xf2^1 + 2, sffc2, -1); - if (newrank[n] == Globals::my_rank) - ffc_recv_.emplace_back(i, n, xf2 + 2, 0, s); - s++; - } else if (newrank[n] == Globals::my_rank) { - ffc_recv_.emplace_back(i, n, xf2 + 2, sffc2, -1); - } - } - } - if (f3) { - if (nloc.lx1 == floc.lx1 && nloc.lx2 == floc.lx2 && nloc.lx3 + o3 == floc.lx3) { - if (ranklist[i] == Globals::my_rank) { - ffc_send_.emplace_back(i, n, xf3^1 + 4, sffc3, -1); - if (newrank[n] == Globals::my_rank) - ffc_recv_.emplace_back(i, n, xf3 + 4, 0, s); - s++; - } else if (newrank[n] == Globals::my_rank) { - ffc_recv_.emplace_back(i, n, xf3 + 4, sffc3, -1); - } + } + } + if (f3) { + floc.lx2 = nloc.lx2, floc.lx3 += o3; + fskip = false; + if (floc.lx3 < 0) { + if (mesh_bcs[BoundaryFace::inner_x3] == BoundaryFlag::periodic) + floc.lx3 = (nrbx3 << (lev+1)) - 1; + else + fskip = true; + } else if (floc.lx3 >= (nrbx3 << (lev+1))) { + if (mesh_bcs[BoundaryFace::outer_x3] == BoundaryFlag::periodic) + floc.lx3 = 0; + else + fskip = true; + } + if (!fskip) { + auto itr = locmap_[lev].find(floc); + if (itr != locmap_[lev].end()) { + const auto& it = *itr; + int i = it.second; + if (ranklist[i] == Globals::my_rank) { + ffc_send_.emplace_back(i, n, xf3^1 + 4, sffc3, -1); + if (newrank[n] == Globals::my_rank) + ffc_recv_.emplace_back(i, n, xf3 + 4, 0, s); + s++; + } else if (newrank[n] == Globals::my_rank) { + ffc_recv_.emplace_back(i, n, xf3 + 4, sffc3, -1); } } } diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index 435d85f32a..4d7b09d006 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -518,6 +518,9 @@ Mesh::Mesh(ParameterInput *pin, int mesh_test) : bnderef = new int[Globals::nranks]; brdisp = new int[Globals::nranks]; bddisp = new int[Globals::nranks]; + int nlevel = pin->GetOrAddInteger("mesh", "numlevel", 1); + locmap_ = new std::unordered_map[nlevel - 1]; } // initialize cost array with the simplest estimate; all the blocks are equal @@ -857,6 +860,9 @@ Mesh::Mesh(ParameterInput *pin, IOWrapper& resfile, int mesh_test) : bnderef = new int[Globals::nranks]; brdisp = new int[Globals::nranks]; bddisp = new int[Globals::nranks]; + int nlevel = pin->GetOrAddInteger("mesh", "numlevel", 1); + locmap_ = new std::unordered_map[nlevel - 1]; } CalculateLoadBalance(costlist, ranklist, nslist, nblist, nbtotal); @@ -971,6 +977,7 @@ Mesh::~Mesh() { delete [] bnderef; delete [] brdisp; delete [] bddisp; + delete [] locmap_; } // delete user Mesh data if (nreal_user_mesh_data_>0) delete [] ruser_mesh_data; From ab7d5fede655bf23e2717ce47a7ef20a05cacea5 Mon Sep 17 00:00:00 2001 From: Kengo Tomida Date: Mon, 2 Sep 2024 00:42:51 +0900 Subject: [PATCH 3/9] Style and comment fix --- src/mesh/amr_loadbalance.cpp | 14 +++++++------- src/mesh/mesh.hpp | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 6de13550bb..8864da054e 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -1225,7 +1225,7 @@ void Mesh::ProlongateMeshBlock(MeshBlock *pb) { int il = pb->cis, iu = pb->cie+1, jl = pb->cjs, ju = pb->cje + f2, kl = pb->cks, ku = pb->cke + f3; - // Step FFC6. skip the surface fields contacting previously refined cells + // Step FFC8. skip the surface fields contacting previously refined MeshBlocks if (pmr->flag_ffc_recv_[BoundaryFace::inner_x1]) il++; if (pmr->flag_ffc_recv_[BoundaryFace::outer_x1]) iu--; if (pmr->flag_ffc_recv_[BoundaryFace::inner_x2]) jl++; @@ -1272,7 +1272,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, } } - // Step FFC1. Find pairs that need face field correction + // Step FFC2. Find pairs that need face field correction int s = 0; for (int n : refined_) { // loop over refined MeshBlocks LogicalLocation const &nloc = newloc[n]; @@ -1375,7 +1375,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, } } - // Step FFC2. Initiate MPI receive + // Step FFC3. Initiate MPI receive #ifdef MPI_PARALLEL for (FaceFieldCorrection &t : ffc_recv_) { if (ranklist[t.from] != Globals::my_rank) { @@ -1386,7 +1386,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, } #endif - // Step FFC3. Pack and send shared face fields + // Step FFC4. Pack and send shared face fields for (FaceFieldCorrection &f : ffc_send_) { MeshBlock *pmb = FindMeshBlock(f.from); int p = 0; @@ -1437,14 +1437,14 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, //! \brief the second part of the face field correction void Mesh::ReceiveAndSetFaceFieldCorrection(int *newrank) { - // Step FFC4. clear face field correction flags + // Step FFC5. clear face field correction flags for (int n = 0; n < nblocal; ++n) { MeshRefinement *pmr = my_blocks(n)->pmr; for (int i = 0; i < 6; ++i) pmr->flag_ffc_recv_[i] = false; } - // Step FFC5. wait and unpack/set + // Step FFC6. wait and unpack/set for (FaceFieldCorrection &t : ffc_recv_) { MeshBlock *pmb = FindMeshBlock(t.to); pmb->pmr->flag_ffc_recv_[t.face] = true; @@ -1488,7 +1488,7 @@ void Mesh::ReceiveAndSetFaceFieldCorrection(int *newrank) { } } - // Step FFC6. Finalize MPI send + // Step FFC7. Finalize MPI send #ifdef MPI_PARALLEL for (FaceFieldCorrection &f : ffc_send_) { if (newrank[f.to] != Globals::my_rank) { diff --git a/src/mesh/mesh.hpp b/src/mesh/mesh.hpp index 427182057b..c6f1349826 100644 --- a/src/mesh/mesh.hpp +++ b/src/mesh/mesh.hpp @@ -16,8 +16,8 @@ // C++ headers #include // int64_t #include // reference_wrapper -#include #include +#include #include // Athena++ headers From 578e0d0ae3fbcf116e8ad978b4e2b3799291a445 Mon Sep 17 00:00:00 2001 From: Kengo Tomida Date: Tue, 10 Sep 2024 15:16:54 +0900 Subject: [PATCH 4/9] locmap was not cleared correctly --- src/mesh/amr_loadbalance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 8864da054e..0219f9418d 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -656,7 +656,7 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { refined_.clear(); ffc_send_.clear(); ffc_recv_.clear(); - for (int l = 0; l < max_level - root_level - 1; ++l) + for (int l = 0; l < max_level - root_level; ++l) locmap_[l].clear(); } From 5413eecf26a4bb3e979b4154c6fe2af099e47f21 Mon Sep 17 00:00:00 2001 From: Kengo Tomida Date: Thu, 19 Sep 2024 01:44:02 +0900 Subject: [PATCH 5/9] Minor update for performance Strore the AMR buffer sizes instead of calculating it at every step. --- src/mesh/amr_loadbalance.cpp | 50 +++------------------ src/mesh/mesh.cpp | 86 ++++++++++++++++++++++++++++++++++++ src/mesh/mesh.hpp | 1 + 3 files changed, 92 insertions(+), 45 deletions(-) diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 0219f9418d..7a3df121f1 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -396,10 +396,6 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { int nbs = nslist[Globals::my_rank]; int nbe = nbs + nblist[Globals::my_rank] - 1; - const int bnx1 = my_blocks(0)->block_size.nx1; - const int bnx2 = my_blocks(0)->block_size.nx2; - const int bnx3 = my_blocks(0)->block_size.nx3; - #ifdef MPI_PARALLEL // Step 3. count the number of the blocks to be sent / received int nsend = 0, nrecv = 0; @@ -428,45 +424,9 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { } } - // Step 4. calculate buffer sizes + // Step 4. allocate and start receiving buffers Real **sendbuf, **recvbuf; - // use the first MeshBlock in the linked list of blocks belonging to this MPI rank as a - // representative of all MeshBlocks for counting the "load-balancing registered" and - // "SMR/AMR-enrolled" quantities (loop over MeshBlock::vars_cc_, not MeshRefinement) - - //! \todo (felker): - //! * add explicit check to ensure that elements of pb->vars_cc/fc_ and - //! pb->pmr->pvars_cc/fc_ v point to the same objects, if adaptive - - // int num_cc = my_blocks(0)->pmr->pvars_cc_.size(); - int num_fc = my_blocks(0)->vars_fc_.size(); - int nx4_tot = 0; - for (AthenaArray &var_cc : my_blocks(0)->vars_cc_) { - nx4_tot += var_cc.GetDim4(); - } - // radiation variables are not included in vars_cc as they need different order - if ((NR_RADIATION_ENABLED|| IM_RADIATION_ENABLED)) { - nx4_tot += my_blocks(0)->pnrrad->ir.GetDim1(); - } - - // cell-centered quantities enrolled in SMR/AMR - int bssame = bnx1*bnx2*bnx3*nx4_tot; - int bsf2c = (bnx1/2)*((bnx2 + 1)/2)*((bnx3 + 1)/2)*nx4_tot; - int bsc2f = (bnx1/2 + 2)*((bnx2 + 1)/2 + 2*f2)*((bnx3 + 1)/2 + 2*f3)*nx4_tot; - // face-centered quantities enrolled in SMR/AMR - bssame += num_fc*((bnx1 + 1)*bnx2*bnx3 + bnx1*(bnx2 + f2)*bnx3 - + bnx1*bnx2*(bnx3 + f3)); - bsf2c += num_fc*(((bnx1/2) + 1)*((bnx2 + 1)/2)*((bnx3 + 1)/2) - + (bnx1/2)*(((bnx2 + 1)/2) + f2)*((bnx3 + 1)/2) - + (bnx1/2)*((bnx2 + 1)/2)*(((bnx3 + 1)/2) + f3)); - bsc2f += num_fc*(((bnx1/2) + 1 + 2)*((bnx2 + 1)/2 + 2*f2)*((bnx3 + 1)/2 + 2*f3) - + (bnx1/2 + 2)*(((bnx2 + 1)/2) + f2 + 2*f2)*((bnx3 + 1)/2 + 2*f3) - + (bnx1/2 + 2)*((bnx2 + 1)/2 + 2*f2)*(((bnx3 + 1)/2) + f3 + 2*f3)); - // add one more element to buffer size for storing the derefinement counter - bssame++; - MPI_Request *req_send, *req_recv; - // Step 5. allocate and start receiving buffers if (nrecv != 0) { recvbuf = new Real*[nrecv]; req_recv = new MPI_Request[nrecv]; @@ -503,7 +463,7 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { } } } - // Step 6. allocate, pack and start sending buffers + // Step 5. allocate, pack and start sending buffers if (nsend != 0) { sendbuf = new Real*[nsend]; req_send = new MPI_Request[nsend]; @@ -550,7 +510,7 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { if (my_blocks(0)->pmr->pvars_fc_.size() > 0) PrepareAndSendFaceFieldCorrection(newloc, ranklist, newrank, nslist, nbtold); - // Step 7. construct a new MeshBlock list (moving the data within the MPI rank) + // Step 6. construct a new MeshBlock list (moving the data within the MPI rank) AthenaArray newlist; newlist.NewAthenaArray(nblist[Globals::my_rank]); RegionSize block_size = my_blocks(0)->block_size; @@ -600,7 +560,7 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { gids_ = nbs; gide_ = nbe; - // Step 8. Receive the data and load into MeshBlocks + // Step 7. Receive the data and load into MeshBlocks // This is a test: try MPI_Waitall later. #ifdef MPI_PARALLEL if (nrecv != 0) { @@ -635,7 +595,7 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, int ntot) { if (my_blocks(0)->pmr->pvars_fc_.size() > 0) ReceiveAndSetFaceFieldCorrection(newrank); - // Step 8b. Prolongate MeshBlocks + // Step 7b. Prolongate MeshBlocks for (int n=nbs; n<=nbe; n++) { int on = newtoold[n]; LogicalLocation const &oloc = loclist[on]; diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index 4d7b09d006..9efe9570dc 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -568,6 +568,49 @@ Mesh::Mesh(ParameterInput *pin, int mesh_test) : if (turb_flag > 0) // TurbulenceDriver depends on the MeshBlock ctor ptrbd = new TurbulenceDriver(this, pin); + +#ifdef MPI_PARALLEL + if (adaptive) { + const int bnx1 = block_size.nx1; + const int bnx2 = block_size.nx2; + const int bnx3 = block_size.nx3; + + // use the first MeshBlock in the linked list of blocks belonging to this MPI rank as a + // representative of all MeshBlocks for counting the "load-balancing registered" and + // "SMR/AMR-enrolled" quantities (loop over MeshBlock::vars_cc_, not MeshRefinement) + + //! \todo (felker): + //! * add explicit check to ensure that elements of pb->vars_cc/fc_ and + //! pb->pmr->pvars_cc/fc_ v point to the same objects, if adaptive + + // int num_cc = my_blocks(0)->pmr->pvars_cc_.size(); + int num_fc = my_blocks(0)->vars_fc_.size(); + int nx4_tot = 0; + for (AthenaArray &var_cc : my_blocks(0)->vars_cc_) { + nx4_tot += var_cc.GetDim4(); + } + // radiation variables are not included in vars_cc as they need different order + if ((NR_RADIATION_ENABLED|| IM_RADIATION_ENABLED)) { + nx4_tot += my_blocks(0)->pnrrad->ir.GetDim1(); + } + + // cell-centered quantities enrolled in SMR/AMR + bssame = bnx1*bnx2*bnx3*nx4_tot; + bsf2c = (bnx1/2)*((bnx2 + 1)/2)*((bnx3 + 1)/2)*nx4_tot; + bsc2f = (bnx1/2 + 2)*((bnx2 + 1)/2 + 2*f2)*((bnx3 + 1)/2 + 2*f3)*nx4_tot; + // face-centered quantities enrolled in SMR/AMR + bssame += num_fc*((bnx1 + 1)*bnx2*bnx3 + bnx1*(bnx2 + f2)*bnx3 + + bnx1*bnx2*(bnx3 + f3)); + bsf2c += num_fc*(((bnx1/2) + 1)*((bnx2 + 1)/2)*((bnx3 + 1)/2) + + (bnx1/2)*(((bnx2 + 1)/2) + f2)*((bnx3 + 1)/2) + + (bnx1/2)*((bnx2 + 1)/2)*(((bnx3 + 1)/2) + f3)); + bsc2f += num_fc*(((bnx1/2) + 1 + 2)*((bnx2 + 1)/2 + 2*f2)*((bnx3 + 1)/2 + 2*f3) + + (bnx1/2 + 2)*(((bnx2 + 1)/2) + f2 + 2*f2)*((bnx3 + 1)/2 + 2*f3) + + (bnx1/2 + 2)*((bnx2 + 1)/2 + 2*f2)*(((bnx3 + 1)/2) + f3 + 2*f3)); + // add one more element to buffer size for storing the derefinement counter + bssame++; + } +#endif } //---------------------------------------------------------------------------------------- @@ -950,6 +993,49 @@ Mesh::Mesh(ParameterInput *pin, IOWrapper& resfile, int mesh_test) : if (turb_flag > 0) // TurbulenceDriver depends on the MeshBlock ctor ptrbd = new TurbulenceDriver(this, pin); + +#ifdef MPI_PARALLEL + if (adaptive) { + const int bnx1 = block_size.nx1; + const int bnx2 = block_size.nx2; + const int bnx3 = block_size.nx3; + + // use the first MeshBlock in the linked list of blocks belonging to this MPI rank as a + // representative of all MeshBlocks for counting the "load-balancing registered" and + // "SMR/AMR-enrolled" quantities (loop over MeshBlock::vars_cc_, not MeshRefinement) + + //! \todo (felker): + //! * add explicit check to ensure that elements of pb->vars_cc/fc_ and + //! pb->pmr->pvars_cc/fc_ v point to the same objects, if adaptive + + // int num_cc = my_blocks(0)->pmr->pvars_cc_.size(); + int num_fc = my_blocks(0)->vars_fc_.size(); + int nx4_tot = 0; + for (AthenaArray &var_cc : my_blocks(0)->vars_cc_) { + nx4_tot += var_cc.GetDim4(); + } + // radiation variables are not included in vars_cc as they need different order + if ((NR_RADIATION_ENABLED|| IM_RADIATION_ENABLED)) { + nx4_tot += my_blocks(0)->pnrrad->ir.GetDim1(); + } + + // cell-centered quantities enrolled in SMR/AMR + bssame = bnx1*bnx2*bnx3*nx4_tot; + bsf2c = (bnx1/2)*((bnx2 + 1)/2)*((bnx3 + 1)/2)*nx4_tot; + bsc2f = (bnx1/2 + 2)*((bnx2 + 1)/2 + 2*f2)*((bnx3 + 1)/2 + 2*f3)*nx4_tot; + // face-centered quantities enrolled in SMR/AMR + bssame += num_fc*((bnx1 + 1)*bnx2*bnx3 + bnx1*(bnx2 + f2)*bnx3 + + bnx1*bnx2*(bnx3 + f3)); + bsf2c += num_fc*(((bnx1/2) + 1)*((bnx2 + 1)/2)*((bnx3 + 1)/2) + + (bnx1/2)*(((bnx2 + 1)/2) + f2)*((bnx3 + 1)/2) + + (bnx1/2)*((bnx2 + 1)/2)*(((bnx3 + 1)/2) + f3)); + bsc2f += num_fc*(((bnx1/2) + 1 + 2)*((bnx2 + 1)/2 + 2*f2)*((bnx3 + 1)/2 + 2*f3) + + (bnx1/2 + 2)*(((bnx2 + 1)/2) + f2 + 2*f2)*((bnx3 + 1)/2 + 2*f3) + + (bnx1/2 + 2)*((bnx2 + 1)/2 + 2*f2)*(((bnx3 + 1)/2) + f3 + 2*f3)); + // add one more element to buffer size for storing the derefinement counter + bssame++; + } +#endif } //---------------------------------------------------------------------------------------- diff --git a/src/mesh/mesh.hpp b/src/mesh/mesh.hpp index c6f1349826..24fb49b34a 100644 --- a/src/mesh/mesh.hpp +++ b/src/mesh/mesh.hpp @@ -341,6 +341,7 @@ class Mesh { bool lb_flag_, lb_automatic_, lb_manual_; double lb_tolerance_; int lb_interval_; + int bssame, bsf2c, bsc2f; // for AMR face field correction std::unordered_map *locmap_; From 19c0c29aef64cae5c15ec031da67dbe1c547143e Mon Sep 17 00:00:00 2001 From: Kengo Tomida Date: Thu, 19 Sep 2024 21:40:03 +0900 Subject: [PATCH 6/9] style fix --- src/mesh/mesh.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index 9efe9570dc..eef2015ff6 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -575,8 +575,8 @@ Mesh::Mesh(ParameterInput *pin, int mesh_test) : const int bnx2 = block_size.nx2; const int bnx3 = block_size.nx3; - // use the first MeshBlock in the linked list of blocks belonging to this MPI rank as a - // representative of all MeshBlocks for counting the "load-balancing registered" and + // use the first MeshBlock in the linked list of blocks belonging to this MPI rank as + // a representative of all MeshBlocks for counting the "load-balancing registered" and // "SMR/AMR-enrolled" quantities (loop over MeshBlock::vars_cc_, not MeshRefinement) //! \todo (felker): @@ -1000,8 +1000,8 @@ Mesh::Mesh(ParameterInput *pin, IOWrapper& resfile, int mesh_test) : const int bnx2 = block_size.nx2; const int bnx3 = block_size.nx3; - // use the first MeshBlock in the linked list of blocks belonging to this MPI rank as a - // representative of all MeshBlocks for counting the "load-balancing registered" and + // use the first MeshBlock in the linked list of blocks belonging to this MPI rank as + // a representative of all MeshBlocks for counting the "load-balancing registered" and // "SMR/AMR-enrolled" quantities (loop over MeshBlock::vars_cc_, not MeshRefinement) //! \todo (felker): From 4a27016baad4680efcf8fd6825be22d35f02c284 Mon Sep 17 00:00:00 2001 From: Kyle Gerard Felker Date: Mon, 14 Oct 2024 14:37:05 -0500 Subject: [PATCH 7/9] Update src/mesh/amr_loadbalance.cpp --- src/mesh/amr_loadbalance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 7a3df121f1..31a18afec7 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -1220,7 +1220,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, const int bnx1 = my_blocks(0)->block_size.nx1; const int bnx2 = my_blocks(0)->block_size.nx2; const int bnx3 = my_blocks(0)->block_size.nx3; - const int nf = my_blocks(0)->pmr->pvars_fc_.size(); + const int nf = static_cast(my_blocks(0)->pmr->pvars_fc_.size()); const int sffc1 = nf*bnx2*bnx3, sffc2 = nf*bnx1*bnx3, sffc3 = nf*bnx2*bnx3; // Step FFC1. Construct a hash map of MeshBlocks before refinement From b7e8f9959220362d38ed89991fce9e4e1480f892 Mon Sep 17 00:00:00 2001 From: Kyle Gerard Felker Date: Mon, 14 Oct 2024 21:05:15 -0500 Subject: [PATCH 8/9] Apply suggestions from code review --- src/mesh/amr_loadbalance.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 31a18afec7..352f59518b 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -1293,7 +1293,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, const auto& it = *itr; int i = it.second; if (ranklist[i] == Globals::my_rank) { - ffc_send_.emplace_back(i, n, xf2^1 + 2, sffc2, -1); + ffc_send_.emplace_back(i, n, xf2^(1 + 2), sffc2, -1); if (newrank[n] == Globals::my_rank) ffc_recv_.emplace_back(i, n, xf2 + 2, 0, s); s++; @@ -1323,7 +1323,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, const auto& it = *itr; int i = it.second; if (ranklist[i] == Globals::my_rank) { - ffc_send_.emplace_back(i, n, xf3^1 + 4, sffc3, -1); + ffc_send_.emplace_back(i, n, xf3^(1 + 4), sffc3, -1); if (newrank[n] == Globals::my_rank) ffc_recv_.emplace_back(i, n, xf3 + 4, 0, s); s++; From fed4b64c9856f1daeb4c78c4c18bae38a484434c Mon Sep 17 00:00:00 2001 From: Kengo TOMIDA Date: Tue, 15 Oct 2024 12:20:04 +0900 Subject: [PATCH 9/9] Update amr_loadbalance.cpp It was my mistake and I have to thank the compiler. --- src/mesh/amr_loadbalance.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 352f59518b..7a140bed6f 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -1293,7 +1293,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, const auto& it = *itr; int i = it.second; if (ranklist[i] == Globals::my_rank) { - ffc_send_.emplace_back(i, n, xf2^(1 + 2), sffc2, -1); + ffc_send_.emplace_back(i, n, (xf2^1) + 2, sffc2, -1); if (newrank[n] == Globals::my_rank) ffc_recv_.emplace_back(i, n, xf2 + 2, 0, s); s++; @@ -1323,7 +1323,7 @@ void Mesh::PrepareAndSendFaceFieldCorrection(LogicalLocation *newloc, const auto& it = *itr; int i = it.second; if (ranklist[i] == Globals::my_rank) { - ffc_send_.emplace_back(i, n, xf3^(1 + 4), sffc3, -1); + ffc_send_.emplace_back(i, n, (xf3^1) + 4, sffc3, -1); if (newrank[n] == Globals::my_rank) ffc_recv_.emplace_back(i, n, xf3 + 4, 0, s); s++;