From e8ac047887e61fea66cdb357bb8e37b8db04a6f3 Mon Sep 17 00:00:00 2001 From: Iulian Grindeanu Date: Mon, 11 Nov 2024 16:01:29 -0600 Subject: [PATCH] replace collective blocking calls when we write in parallel, number of iterations per task can be different we cannot use blocking calls in general need to use non-blocking calls --- Source/IO/NCInterface.H | 9 ++ Source/IO/NCInterface.cpp | 17 ++++ Source/IO/NCPlotFile.cpp | 191 +++++++++++++++++++++----------------- 3 files changed, 133 insertions(+), 84 deletions(-) diff --git a/Source/IO/NCInterface.H b/Source/IO/NCInterface.H index c8131ea..36f1a40 100644 --- a/Source/IO/NCInterface.H +++ b/Source/IO/NCInterface.H @@ -83,6 +83,12 @@ struct NCVar const std::vector&) const; //! Write out a slice of data with with strides (see hyperslab definition in //! NetCDF) + void iput( + const double* dptr, + const std::vector& start, + const std::vector& count, + int * request) const ; + void put(const double*, const std::vector&, @@ -329,6 +335,9 @@ public: void get_attr(const std::string& name, std::vector& value) const; void get_attr(const std::string& name, std::vector& value) const; + // for non-blocking calls + void wait_all( int num_requests, int * requests); + //! Return a list of all dimensions defined in this group std::vector all_dims() const; diff --git a/Source/IO/NCInterface.cpp b/Source/IO/NCInterface.cpp index 2384dbf..c97b065 100644 --- a/Source/IO/NCInterface.cpp +++ b/Source/IO/NCInterface.cpp @@ -98,6 +98,17 @@ void NCVar::put_all( ncmpi_put_vara_double_all(ncid, varid, start.data(), count.data(), dptr)); } +//! Write out a slice of data, non-blocking +void NCVar::iput( + const double* dptr, + const std::vector& start, + const std::vector& count, + int * request) const +{ + check_ncmpi_error( + ncmpi_iput_vara_double(ncid, varid, start.data(), count.data(), dptr, request)); +} + void NCVar::put( const double* dptr, const std::vector& start, @@ -656,6 +667,12 @@ NCFile NCFile::open( return NCFile(ncid); } +void NCFile::wait_all( int num_requests, int * requests) +{ + std::vector statuses(num_requests); + ncmpi_wait_all(ncid, num_requests, requests, &statuses[0]); +} + NCFile::~NCFile() { if (is_open) check_ncmpi_error(ncmpi_close(ncid)); diff --git a/Source/IO/NCPlotFile.cpp b/Source/IO/NCPlotFile.cpp index 7a96100..b6aef9d 100644 --- a/Source/IO/NCPlotFile.cpp +++ b/Source/IO/NCPlotFile.cpp @@ -346,6 +346,8 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain, mask_arrays_for_write(lev, (Real) fill_value); + std::vector requests; + int irq=0; for (MFIter mfi(*cons_new[lev],false); mfi.isValid(); ++mfi) { auto bx = mfi.validbox(); @@ -386,48 +388,55 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain, auto nc_plot_var = ncf.var("h"); //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp_bathy.dataPtr(), {local_start_y,local_start_x}, - {local_ny, local_nx}); + requests.push_back(0); + nc_plot_var.iput(tmp_bathy.dataPtr(), {local_start_y,local_start_x}, + {local_ny, local_nx}, &requests[irq++]); } { - FArrayBox tmp_zeta; - tmp_zeta.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); - tmp_zeta.template copy((*vec_Zt_avg1[lev])[mfi.index()],0,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("zeta"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp_zeta.dataPtr(), {local_start_nt,local_start_y,local_start_x}, - {local_nt, local_ny, local_nx}); + FArrayBox tmp_zeta; + tmp_zeta.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); + tmp_zeta.template copy((*vec_Zt_avg1[lev])[mfi.index()],0,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("zeta"); + //nc_plot_var.par_access(NC_INDEPENDENT); + requests.push_back(0); + nc_plot_var.iput(tmp_zeta.dataPtr(), {local_start_nt,local_start_y,local_start_x}, + {local_nt, local_ny, local_nx}, &requests[irq++]); } { - FArrayBox tmp_temp; - tmp_temp.resize(tmp_bx,1,amrex::The_Pinned_Arena()); - tmp_temp.template copy((*cons_new[lev])[mfi.index()],Temp_comp,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("temp"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp_temp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, - {local_nt, local_nz, local_ny, local_nx}); + FArrayBox tmp_temp; + tmp_temp.resize(tmp_bx,1,amrex::The_Pinned_Arena()); + tmp_temp.template copy((*cons_new[lev])[mfi.index()],Temp_comp,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("temp"); + //nc_plot_var.par_access(NC_INDEPENDENT); + requests.push_back(0); + nc_plot_var.iput(tmp_temp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, + {local_nt, local_nz, local_ny, local_nx}, &requests[irq++]); } { - FArrayBox tmp_salt; - tmp_salt.resize(tmp_bx,1,amrex::The_Pinned_Arena()); - tmp_salt.template copy((*cons_new[lev])[mfi.index()],Salt_comp,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("salt"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp_salt.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, - {local_nt, local_nz, local_ny, local_nx}); + FArrayBox tmp_salt; + tmp_salt.resize(tmp_bx,1,amrex::The_Pinned_Arena()); + tmp_salt.template copy((*cons_new[lev])[mfi.index()],Salt_comp,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("salt"); + //nc_plot_var.par_access(NC_INDEPENDENT); + requests.push_back(0); + nc_plot_var.iput(tmp_salt.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, + {local_nt, local_nz, local_ny, local_nx}, &requests[irq++]); } } // subdomain } // mfi + ncf.wait_all(irq, &requests[0]); + requests.resize(0); + irq = 0; // Writing u (we loop over cons to get cell-centered box) for (MFIter mfi(*cons_new[lev],false); mfi.isValid(); ++mfi) { @@ -457,42 +466,50 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain, long unsigned local_start_z = static_cast(tmp_bx.smallEnd()[2]); { - FArrayBox tmp; - tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena()); - tmp.template copy((*xvel_new[lev])[mfi.index()],0,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("u"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, - {local_nt, local_nz, local_ny, local_nx}); + FArrayBox tmp; + tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena()); + tmp.template copy((*xvel_new[lev])[mfi.index()],0,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("u"); + //nc_plot_var.par_access(NC_INDEPENDENT); + std::cout << " local start nt, z:" << local_start_nt << " " << local_start_z << "\n"; + requests.push_back(0); + nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, + {local_nt, local_nz, local_ny, local_nx}, &requests[irq++]); } { - FArrayBox tmp; - tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); - tmp.template copy((*vec_ubar[lev])[mfi.index()],0,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("ubar"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, - {local_nt, local_ny, local_nx}); + FArrayBox tmp; + tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); + tmp.template copy((*vec_ubar[lev])[mfi.index()],0,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("ubar"); + std::cout << " write ubar \n"; + //nc_plot_var.par_access(NC_INDEPENDENT); + requests.push_back(0); + nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, + {local_nt, local_ny, local_nx}, &requests[irq++]); } { - FArrayBox tmp; - tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); - tmp.template copy((*vec_sustr[lev])[mfi.index()],0,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("sustr"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, - {local_nt, local_ny, local_nx}); + FArrayBox tmp; + tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); + tmp.template copy((*vec_sustr[lev])[mfi.index()],0,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("sustr"); + std::cout << " write sustr \n"; + //nc_plot_var.par_access(NC_INDEPENDENT); + requests.push_back(0); + nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, + {local_nt, local_ny, local_nx}, &requests[irq++]); } } // in subdomain } // mfi - + ncf.wait_all(irq, &requests[0]); + requests.resize(0); + irq = 0; // Writing v (we loop over cons to get cell-centered box) for (MFIter mfi(*cons_new[lev],false); mfi.isValid(); ++mfi) { @@ -525,43 +542,49 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain, long unsigned local_start_z = static_cast(tmp_bx.smallEnd()[2]); { - FArrayBox tmp; - tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena()); - tmp.template copy((*yvel_new[lev])[mfi.index()],0,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("v"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, - {local_nt, local_nz, local_ny, local_nx}); + FArrayBox tmp; + tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena()); + tmp.template copy((*yvel_new[lev])[mfi.index()],0,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("v"); + //nc_plot_var.par_access(NC_INDEPENDENT); + requests.push_back(0); + nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x}, + {local_nt, local_nz, local_ny, local_nx}, &requests[irq++]); } { - FArrayBox tmp; - tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); - tmp.template copy((*vec_vbar[lev])[mfi.index()],0,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("vbar"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, - {local_nt, local_ny, local_nx}); + FArrayBox tmp; + tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); + tmp.template copy((*vec_vbar[lev])[mfi.index()],0,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("vbar"); + //nc_plot_var.par_access(NC_INDEPENDENT); + requests.push_back(0); + nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, + {local_nt, local_ny, local_nx}, &requests[irq++]); } { - FArrayBox tmp; - tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); - tmp.template copy((*vec_svstr[lev])[mfi.index()],0,0,1); - Gpu::streamSynchronize(); - - auto nc_plot_var = ncf.var("svstr"); - //nc_plot_var.par_access(NC_INDEPENDENT); - nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, - {local_nt, local_ny, local_nx}); + FArrayBox tmp; + tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena()); + tmp.template copy((*vec_svstr[lev])[mfi.index()],0,0,1); + Gpu::streamSynchronize(); + + auto nc_plot_var = ncf.var("svstr"); + //nc_plot_var.par_access(NC_INDEPENDENT); + nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x}, + {local_nt, local_ny, local_nx}, &requests[irq++]); } } // in subdomain } // mfi + ncf.wait_all(irq, &requests[0]); + requests.resize(0); + irq = 0; + mask_arrays_for_write(lev, 0.0_rt); ncf.close();