Skip to content

Commit

Permalink
replace collective blocking calls
Browse files Browse the repository at this point in the history
when we write in parallel, number of iterations per task
can be different
we cannot use blocking calls in general
need to use non-blocking calls
  • Loading branch information
iulian787 committed Nov 11, 2024
1 parent f44e7c5 commit e8ac047
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 84 deletions.
9 changes: 9 additions & 0 deletions Source/IO/NCInterface.H
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ struct NCVar
const std::vector<MPI_Offset>&) const;
//! Write out a slice of data with with strides (see hyperslab definition in
//! NetCDF)
void iput(
const double* dptr,
const std::vector<MPI_Offset>& start,
const std::vector<MPI_Offset>& count,
int * request) const ;

void
put(const double*,
const std::vector<MPI_Offset>&,
Expand Down Expand Up @@ -329,6 +335,9 @@ public:
void get_attr(const std::string& name, std::vector<float>& value) const;
void get_attr(const std::string& name, std::vector<int>& value) const;

// for non-blocking calls
void wait_all( int num_requests, int * requests);

//! Return a list of all dimensions defined in this group
std::vector<NCDim> all_dims() const;

Expand Down
17 changes: 17 additions & 0 deletions Source/IO/NCInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@ void NCVar::put_all(
ncmpi_put_vara_double_all(ncid, varid, start.data(), count.data(), dptr));
}

//! Write out a slice of data, non-blocking
void NCVar::iput(
const double* dptr,
const std::vector<MPI_Offset>& start,
const std::vector<MPI_Offset>& count,
int * request) const
{
check_ncmpi_error(
ncmpi_iput_vara_double(ncid, varid, start.data(), count.data(), dptr, request));
}

void NCVar::put(
const double* dptr,
const std::vector<MPI_Offset>& start,
Expand Down Expand Up @@ -656,6 +667,12 @@ NCFile NCFile::open(
return NCFile(ncid);
}

void NCFile::wait_all( int num_requests, int * requests)
{
std::vector<int > statuses(num_requests);
ncmpi_wait_all(ncid, num_requests, requests, &statuses[0]);
}

NCFile::~NCFile()
{
if (is_open) check_ncmpi_error(ncmpi_close(ncid));
Expand Down
191 changes: 107 additions & 84 deletions Source/IO/NCPlotFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,8 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain,

mask_arrays_for_write(lev, (Real) fill_value);

std::vector<int> requests;
int irq=0;
for (MFIter mfi(*cons_new[lev],false); mfi.isValid(); ++mfi)
{
auto bx = mfi.validbox();
Expand Down Expand Up @@ -386,48 +388,55 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain,

auto nc_plot_var = ncf.var("h");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp_bathy.dataPtr(), {local_start_y,local_start_x},
{local_ny, local_nx});
requests.push_back(0);
nc_plot_var.iput(tmp_bathy.dataPtr(), {local_start_y,local_start_x},
{local_ny, local_nx}, &requests[irq++]);
}

{
FArrayBox tmp_zeta;
tmp_zeta.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp_zeta.template copy<RunOn::Device>((*vec_Zt_avg1[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("zeta");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp_zeta.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx});
FArrayBox tmp_zeta;
tmp_zeta.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp_zeta.template copy<RunOn::Device>((*vec_Zt_avg1[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("zeta");
//nc_plot_var.par_access(NC_INDEPENDENT);
requests.push_back(0);
nc_plot_var.iput(tmp_zeta.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx}, &requests[irq++]);
}

{
FArrayBox tmp_temp;
tmp_temp.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp_temp.template copy<RunOn::Device>((*cons_new[lev])[mfi.index()],Temp_comp,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("temp");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp_temp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx});
FArrayBox tmp_temp;
tmp_temp.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp_temp.template copy<RunOn::Device>((*cons_new[lev])[mfi.index()],Temp_comp,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("temp");
//nc_plot_var.par_access(NC_INDEPENDENT);
requests.push_back(0);
nc_plot_var.iput(tmp_temp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx}, &requests[irq++]);
}

{
FArrayBox tmp_salt;
tmp_salt.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp_salt.template copy<RunOn::Device>((*cons_new[lev])[mfi.index()],Salt_comp,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("salt");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp_salt.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx});
FArrayBox tmp_salt;
tmp_salt.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp_salt.template copy<RunOn::Device>((*cons_new[lev])[mfi.index()],Salt_comp,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("salt");
//nc_plot_var.par_access(NC_INDEPENDENT);
requests.push_back(0);
nc_plot_var.iput(tmp_salt.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx}, &requests[irq++]);
}
} // subdomain
} // mfi

ncf.wait_all(irq, &requests[0]);
requests.resize(0);
irq = 0;
// Writing u (we loop over cons to get cell-centered box)
for (MFIter mfi(*cons_new[lev],false); mfi.isValid(); ++mfi)
{
Expand Down Expand Up @@ -457,42 +466,50 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain,
long unsigned local_start_z = static_cast<long unsigned>(tmp_bx.smallEnd()[2]);

{
FArrayBox tmp;
tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*xvel_new[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("u");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx});
FArrayBox tmp;
tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*xvel_new[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("u");
//nc_plot_var.par_access(NC_INDEPENDENT);
std::cout << " local start nt, z:" << local_start_nt << " " << local_start_z << "\n";
requests.push_back(0);
nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx}, &requests[irq++]);
}

{
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_ubar[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("ubar");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx});
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_ubar[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("ubar");
std::cout << " write ubar \n";
//nc_plot_var.par_access(NC_INDEPENDENT);
requests.push_back(0);
nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx}, &requests[irq++]);
}
{
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_sustr[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("sustr");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx});
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_sustr[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("sustr");
std::cout << " write sustr \n";
//nc_plot_var.par_access(NC_INDEPENDENT);
requests.push_back(0);
nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx}, &requests[irq++]);
}
} // in subdomain
} // mfi

ncf.wait_all(irq, &requests[0]);
requests.resize(0);
irq = 0;
// Writing v (we loop over cons to get cell-centered box)
for (MFIter mfi(*cons_new[lev],false); mfi.isValid(); ++mfi)
{
Expand Down Expand Up @@ -525,43 +542,49 @@ REMORA::WriteNCPlotFile_which(int lev, int which_subdomain,
long unsigned local_start_z = static_cast<long unsigned>(tmp_bx.smallEnd()[2]);

{
FArrayBox tmp;
tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*yvel_new[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("v");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx});
FArrayBox tmp;
tmp.resize(tmp_bx,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*yvel_new[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("v");
//nc_plot_var.par_access(NC_INDEPENDENT);
requests.push_back(0);
nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_z,local_start_y,local_start_x},
{local_nt, local_nz, local_ny, local_nx}, &requests[irq++]);
}

{
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_vbar[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("vbar");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx});
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_vbar[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("vbar");
//nc_plot_var.par_access(NC_INDEPENDENT);
requests.push_back(0);
nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx}, &requests[irq++]);
}
{
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_svstr[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("svstr");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.put_all(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx});
FArrayBox tmp;
tmp.resize(tmp_bx_2d,1,amrex::The_Pinned_Arena());
tmp.template copy<RunOn::Device>((*vec_svstr[lev])[mfi.index()],0,0,1);
Gpu::streamSynchronize();

auto nc_plot_var = ncf.var("svstr");
//nc_plot_var.par_access(NC_INDEPENDENT);
nc_plot_var.iput(tmp.dataPtr(), {local_start_nt,local_start_y,local_start_x},
{local_nt, local_ny, local_nx}, &requests[irq++]);
}

} // in subdomain
} // mfi

ncf.wait_all(irq, &requests[0]);
requests.resize(0);
irq = 0;

mask_arrays_for_write(lev, 0.0_rt);

ncf.close();
Expand Down

0 comments on commit e8ac047

Please sign in to comment.