Skip to content

Commit

Permalink
Merge pull request #27 from WeiqunZhang/weiqun/development
Browse files Browse the repository at this point in the history
Fix race conditions
  • Loading branch information
jmsexton03 authored Sep 15, 2023
2 parents 0625dcc + 5d6bb65 commit 1fbb848
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 19 deletions.
11 changes: 7 additions & 4 deletions Source/IO/NCColumnFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,13 @@ ROMSX::writeToNCColumnFile(const int lev,
const int idx_vec = k - kstart;
const int ialpha = i - iloc;
const int jalpha = j - jloc;
ucol[idx_vec] += velx(i+iloc_shift,j,k) * alpha_u(ialpha, jalpha);
vcol[idx_vec] += vely(i,j+jloc_shift,k) * alpha_v(ialpha, jalpha);
thetacol[idx_vec] += state(i,j,k,Temp_comp) / state(i,j,k,Rho_comp)
* alpha_theta(ialpha, jalpha);
auto tmp = velx(i+iloc_shift,j,k) * alpha_u(ialpha, jalpha);
Gpu::Atomic::Add(&(ucol[idx_vec]), tmp);
tmp = vely(i,j+jloc_shift,k) * alpha_v(ialpha, jalpha);
Gpu::Atomic::Add(&(vcol[idx_vec]), tmp);
tmp = state(i,j,k,Temp_comp) / state(i,j,k,Rho_comp)
* alpha_theta(ialpha, jalpha);
Gpu::Atomic::Add(&(thetacol[idx_vec]), tmp);
});
}
}
Expand Down
3 changes: 0 additions & 3 deletions Source/ROMSX_SumIQ.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ ROMSX::build_fine_mask(int level)
const auto iarr = ifab.array();
amrex::ParallelFor(
fab.box(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
#ifdef _OPENMP
#pragma omp atomic write
#endif
arr(i, j, k) = iarr(i, j, k);
});
}
Expand Down
9 changes: 8 additions & 1 deletion Source/TimeIntegration/ROMSX_advance_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,15 @@ ROMSX::advance_3d (int lev,
if(k!=N) {
W(i,j,k) = W(i,j,k)- wrk_i*(z_w(i,j,k)+h(i,j,0,0));
}
else
});

// probably not the most efficient way
amrex::ParallelFor(gbx1,
[=] AMREX_GPU_DEVICE (int i, int j, int k)
{
if (k == N) {
W(i,j,N) = 0.0;
}
});

//
Expand Down
9 changes: 8 additions & 1 deletion Source/TimeIntegration/ROMSX_prestep_t_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,16 @@ ROMSX::prestep_t_3d (const Box& tbx, const Box& gbx,
if(k!=N) {
W(i,j,k) = W(i,j,k)- wrk_i*(z_w(i,j,k)+h(i,j,0,0));
}
else
});

amrex::ParallelFor(gbx1,
[=] AMREX_GPU_DEVICE (int i, int j, int k)
{
if (k == N) {
W(i,j,N) = 0.0;
}
});

FArrayBox fab_Akt(tbxp2,1,amrex::The_Async_Arena());
auto Akt= fab_Akt.array();

Expand Down
4 changes: 2 additions & 2 deletions Source/TimeIntegration/ROMSX_prsgrd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ ROMSX::prsgrd (const Box& phi_bx,
dR(i,j,k)=rho(i,j,k+1)-rho(i,j,k);
dZ(i,j,k)=z_r(i,j,k+1)-z_r(i,j,k);
} else {
dR(i,j,N)=dR(i,j,N-1);
dZ(i,j,N)=dZ(i,j,N-1);
dR(i,j,N)=rho(i,j,N)-rho(i,j,N-1);
dZ(i,j,N)=z_r(i,j,N)-z_r(i,j,N-1);
//This is really k=-1
//dR(i,j,0)=dR(i,j,1);
//dZ(i,j,0)=dZ(i,j,1);
Expand Down
8 changes: 6 additions & 2 deletions Source/TimeIntegration/ROMSX_rhs_uv_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,13 @@ ROMSX::rhs_2d (const Box& bx,
Huee(i,j,k)=Huon(i,j-1,k)-2.0*Huon(i,j,k)+Huon(i,j+1,k);
cff1=vold(i ,j,k,nrhs)+vold(i-1,j,k,nrhs);
cff2=Huon(i,j,k)+Huon(i,j-1,k);
cff3=vxx(i-1,j,k)+vxx(i,j,k);
auto vxx_im1 = (i == gbx1.smallEnd(0)) ? vxx(i-1,j,k) :
(vold(i-2,j,k,nrhs)-2.0*vold(i-1,j,k,nrhs)+vold(i,j,k,nrhs));
cff3=vxx_im1+vxx(i,j,k);

VFx(i,j,k)=0.25*(cff1-cff3*cff)* (cff2-cff*(Huee(i,j,k)+ Huee(i,j-1,k)));
auto Huee_jm1 = (j == gbx1.smallEnd(1)) ? Huee(i,j-1,k) :
(Huon(i,j-2,k)-2.0*Huon(i,j-1,k)+Huon(i,j,k));
VFx(i,j,k)=0.25*(cff1-cff3*cff)* (cff2-cff*(Huee(i,j,k)+ Huee_jm1));
vee(i,j,k)=vold(i,j-1,k,nrhs)-2.0*vold(i,j,k,nrhs)+
vold(i,j+1,k,nrhs);
Hvee(i,j,k)=Hvom(i,j-1,k)-2.0*Hvom(i,j,k)+Hvom(i,j+1,k);
Expand Down
15 changes: 9 additions & 6 deletions Source/TimeIntegration/ROMSX_rhs_uv_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,14 @@ ROMSX::rhs_3d (const Box& bx, const Box& gbx,
cff1=vold(i ,j,k,nrhs)+vold(i-1,j,k,nrhs);
cff2=Huon(i,j,k)+Huon(i,j-1,k);
if (cff2>0.0) {
cff=vxx(i-1,j,k);
cff = (i == tbxp1.smallEnd(0)) ? vxx(i-1,j,k) :
(vold(i-2,j,k,nrhs)-2.0*vold(i-1,j,k,nrhs)+vold(i,j,k,nrhs));
} else {
cff=vxx(i,j,k);
}
VFx(i,j,k)=0.25*(cff1+Gadv*cff)* (cff2+Gadv*0.5*(Huee(i,j,k)+ Huee(i,j-1,k)));
auto Huee_jm1 = (j == tbxp1.smallEnd(1)) ? Huee(i,j-1,k) :
(Huon(i,j-2,k)-2.0*Huon(i,j-1,k)+Huon(i,j,k));
VFx(i,j,k)=0.25*(cff1+Gadv*cff)* (cff2+Gadv*0.5*(Huee(i,j,k)+ Huee_jm1));
vee(i,j,k)=vold(i,j-1,k,nrhs)-2.0*vold(i,j,k,nrhs)+
vold(i,j+1,k,nrhs);
Hvee(i,j,k)=Hvom(i,j-1,k)-2.0*Hvom(i,j,k)+Hvom(i,j+1,k);
Expand Down Expand Up @@ -307,8 +310,8 @@ ROMSX::rhs_3d (const Box& bx, const Box& gbx,
Real cff2=1.0/16.0;
Real cff;
//Recursive summation:
rufrc(i,j,0)+=ru(i,j,k,nrhs);
rvfrc(i,j,0)+=rv(i,j,k,nrhs);
Gpu::Atomic::Add(&(rufrc(i,j,0)), ru(i,j,k,nrhs));
Gpu::Atomic::Add(&(rvfrc(i,j,0)), rv(i,j,k,nrhs));
// This toggles whether to upate forcing terms on slabbed box or not. Slabbing it changes plotfile to machine precision
#if 1
//These forcing terms should possibly be updated on a slabbed box
Expand All @@ -321,7 +324,7 @@ ROMSX::rhs_3d (const Box& bx, const Box& gbx,
cff2=-bustr(i,j,0)*cff;
else
cff2=0.0;
rufrc(i,j,0)+=cff1+cff2;
Gpu::Atomic::Add(&(rufrc(i,j,0)), cff1+cff2);

//These forcing terms should possibly be updated on a slabbed box
cff=om_v(i,j,0)*on_v(i,j,0);
Expand All @@ -333,7 +336,7 @@ ROMSX::rhs_3d (const Box& bx, const Box& gbx,
cff2=-bvstr(i,j,0)*cff;
else
cff2=0.0;
rvfrc(i,j,0)+=cff1+cff2;
Gpu::Atomic::Add(&(rvfrc(i,j,0)), cff1+cff2);
#else
});
amrex::ParallelFor(gbx1D,
Expand Down

0 comments on commit 1fbb848

Please sign in to comment.