49 tbxp1.grow(IntVect(NGROW-1,NGROW-1,0));
- 52 BoxArray ba_gbx1 = intersect(BoxArray(tbxp1), gbx);
- 53 AMREX_ASSERT((ba_gbx1.size() == 1));
- 54 Box gbx1 = ba_gbx1[0];
-
-
-
-
-
-
-
-
-
- 64 FArrayBox fab_UFx(growLo(xbx,0,1),1,amrex::The_Async_Arena()); fab_UFx.template setVal<RunOn::Device>(0.);
- 65 FArrayBox fab_UFe(growHi(xbx,1,1),1,amrex::The_Async_Arena()); fab_UFe.template setVal<RunOn::Device>(0.);
- 66 FArrayBox fab_VFe(growLo(ybx,1,1),1,amrex::The_Async_Arena()); fab_VFe.template setVal<RunOn::Device>(0.);
- 67 FArrayBox fab_VFx(growHi(ybx,0,1),1,amrex::The_Async_Arena()); fab_VFx.template setVal<RunOn::Device>(0.);
-
- 69 auto UFx=fab_UFx.array();
- 70 auto UFe=fab_UFe.array();
- 71 auto VFx=fab_VFx.array();
- 72 auto VFe=fab_VFe.array();
-
-
- 75 const Real Gadv = -0.25;
-
-
-
-
-
-
+
+
+
+ 55 FArrayBox fab_UFx(growLo(xbx,0,1),1,amrex::The_Async_Arena()); fab_UFx.template setVal<RunOn::Device>(0.);
+ 56 FArrayBox fab_UFe(growHi(xbx,1,1),1,amrex::The_Async_Arena()); fab_UFe.template setVal<RunOn::Device>(0.);
+ 57 FArrayBox fab_VFe(growLo(ybx,1,1),1,amrex::The_Async_Arena()); fab_VFe.template setVal<RunOn::Device>(0.);
+ 58 FArrayBox fab_VFx(growHi(ybx,0,1),1,amrex::The_Async_Arena()); fab_VFx.template setVal<RunOn::Device>(0.);
+
+ 60 auto UFx=fab_UFx.array();
+ 61 auto UFe=fab_UFe.array();
+ 62 auto VFx=fab_VFx.array();
+ 63 auto VFe=fab_VFe.array();
+
+
+ 66 const Real Gadv = -0.25;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
- 95 ParallelFor(growLo(xbx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 97 Real cff1 = uold(i,j,k,nrhs)+uold(i+1,j,k,nrhs);
-
-
- 100 Real cff = (cff1 > 0.0) ? uold(i-1,j,k,nrhs)-2.0*uold(i ,j,k,nrhs)+uold(i+1,j,k,nrhs) :
- 101 uold(i ,j,k,nrhs)-2.0*uold(i+1,j,k,nrhs)+uold(i+2,j,k,nrhs);
+
+
+
+ 86 ParallelFor(growLo(xbx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 88 Real cff1 = uold(i,j,k,nrhs)+uold(i+1,j,k,nrhs);
+
+
+ 91 Real cff = (cff1 > 0.0) ? uold(i-1,j,k,nrhs)-2.0*uold(i ,j,k,nrhs)+uold(i+1,j,k,nrhs) :
+ 92 uold(i ,j,k,nrhs)-2.0*uold(i+1,j,k,nrhs)+uold(i+2,j,k,nrhs);
+
+ 94 Real Huxx_i = Huon(i-1,j,k)-2.0*Huon(i ,j,k)+Huon(i+1,j,k);
+ 95 Real Huxx_ip1 = Huon(i ,j,k)-2.0*Huon(i+1,j,k)+Huon(i+2,j,k);
+ 96 Real Huxx_avg = 0.5 * (Huxx_i + Huxx_ip1);
+
+ 98 Real Huon_avg = (Huon(i,j,k) + Huon(i+1,j,k));
+
+ 100 UFx(i,j,k) = 0.25*(cff1+Gadv*cff) * ( Huon_avg + Gadv*Huxx_avg );
+
- 103 Real Huxx_i = Huon(i-1,j,k)-2.0*Huon(i ,j,k)+Huon(i+1,j,k);
- 104 Real Huxx_ip1 = Huon(i ,j,k)-2.0*Huon(i+1,j,k)+Huon(i+2,j,k);
- 105 Real Huxx_avg = 0.5 * (Huxx_i + Huxx_ip1);
-
- 107 Real Huon_avg = (Huon(i,j,k) + Huon(i+1,j,k));
-
- 109 UFx(i,j,k) = 0.25*(cff1+Gadv*cff) * ( Huon_avg + Gadv*Huxx_avg );
-
-
-
-
-
- 115 ParallelFor(growHi(xbx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 117 Real cff1 = uold(i,j,k,nrhs) + uold(i ,j-1,k,nrhs);
- 118 Real cff2 = Hvom(i,j,k) + Hvom(i-1,j ,k);
-
-
- 121 Real cff = (cff2 > 0.0) ? uold(i,j-2,k,nrhs) - 2.0*uold(i,j-1,k,nrhs) + uold(i ,j,k,nrhs) :
- 122 uold(i,j-1,k,nrhs) - 2.0*uold(i,j ,k,nrhs) + uold(i,j+1,k,nrhs);
-
- 124 Real Hvxx_i = Hvom(i-1,j,k)-2.0*Hvom(i ,j,k)+Hvom(i+1,j,k);
- 125 Real Hvxx_im1 = Hvom(i-2,j,k)-2.0*Hvom(i-1,j,k)+Hvom(i ,j,k);
-
- 127 UFe(i,j,k) = 0.25 * (cff1+Gadv*cff)* (cff2+Gadv*0.5*(Hvxx_i + Hvxx_im1));
-
-
-
-
-
- 133 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 135 ru(i,j,k,nrhs) -= ( (UFx(i,j,k)-UFx(i-1,j,k)) + (UFe(i,j+1,k)-UFe(i ,j,k)) );
-
-
- 138 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
-
-
-
-
-
-
-
-
- 148 FC(i,j,k)=( cff1*(uold(i ,j,k ,nrhs)+ uold(i,j,k+1,nrhs))
- 149 -cff2*(uold(i ,j,k-1,nrhs)+ uold(i,j,k+2,nrhs)) )*
- 150 ( cff1*( W(i ,j,k)+ W(i-1,j,k))
- 151 -cff2*( W(i+1,j,k)+ W(i-2,j,k)) );
-
-
-
-
-
- 157 FC(i,j,N-1)=( cff1*(uold(i ,j,N-1,nrhs)+ uold(i,j,N ,nrhs))
- 158 -cff2*(uold(i ,j,N-2,nrhs)+ uold(i,j,N ,nrhs)) )*
- 159 ( cff1*( W(i ,j,N-1)+ W(i-1,j,N-1))
- 160 -cff2*( W(i+1,j,N-1)+ W(i-2,j,N-1)) );
+
+
+
+ 106 ParallelFor(growHi(xbx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 108 Real cff1 = uold(i,j,k,nrhs) + uold(i ,j-1,k,nrhs);
+ 109 Real cff2 = Hvom(i,j,k) + Hvom(i-1,j ,k);
+
+
+ 112 Real cff = (cff2 > 0.0) ? uold(i,j-2,k,nrhs) - 2.0*uold(i,j-1,k,nrhs) + uold(i ,j,k,nrhs) :
+ 113 uold(i,j-1,k,nrhs) - 2.0*uold(i,j ,k,nrhs) + uold(i,j+1,k,nrhs);
+
+ 115 Real Hvxx_i = Hvom(i-1,j,k)-2.0*Hvom(i ,j,k)+Hvom(i+1,j,k);
+ 116 Real Hvxx_im1 = Hvom(i-2,j,k)-2.0*Hvom(i-1,j,k)+Hvom(i ,j,k);
+
+ 118 UFe(i,j,k) = 0.25 * (cff1+Gadv*cff)* (cff2+Gadv*0.5*(Hvxx_i + Hvxx_im1));
+
+
+
+
+
+ 124 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 126 ru(i,j,k,nrhs) -= ( (UFx(i,j,k)-UFx(i-1,j,k)) + (UFe(i,j+1,k)-UFe(i ,j,k)) );
+
+
+ 129 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+
+
+
+
+
+
+
+
+ 139 FC(i,j,k)=( cff1*(uold(i ,j,k ,nrhs)+ uold(i,j,k+1,nrhs))
+ 140 -cff2*(uold(i ,j,k-1,nrhs)+ uold(i,j,k+2,nrhs)) )*
+ 141 ( cff1*( W(i ,j,k)+ W(i-1,j,k))
+ 142 -cff2*( W(i+1,j,k)+ W(i-2,j,k)) );
+
+
+
+
+
+ 148 FC(i,j,N-1)=( cff1*(uold(i ,j,N-1,nrhs)+ uold(i,j,N ,nrhs))
+ 149 -cff2*(uold(i ,j,N-2,nrhs)+ uold(i,j,N ,nrhs)) )*
+ 150 ( cff1*( W(i ,j,N-1)+ W(i-1,j,N-1))
+ 151 -cff2*( W(i+1,j,N-1)+ W(i-2,j,N-1)) );
+
+ 153 FC(i,j,0)=( cff1*(uold(i ,j,0,nrhs)+ uold(i,j,1,nrhs))
+ 154 -cff2*(uold(i ,j,0,nrhs)+ uold(i,j,2,nrhs)) )*
+ 155 ( cff1*( W(i ,j,0)+ W(i-1,j,0))
+ 156 -cff2*( W(i+1,j,0)+ W(i-2,j,0)) );
+
+
+
+
- 162 FC(i,j,0)=( cff1*(uold(i ,j,0,nrhs)+ uold(i,j,1,nrhs))
- 163 -cff2*(uold(i ,j,0,nrhs)+ uold(i,j,2,nrhs)) )*
- 164 ( cff1*( W(i ,j,0)+ W(i-1,j,0))
- 165 -cff2*( W(i+1,j,0)+ W(i-2,j,0)) );
-
-
-
-
+ 162 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 164 Real cff = (k >= 1) ? FC(i,j,k)-FC(i,j,k-1) : FC(i,j,k);
+
+ 166 ru(i,j,k,nrhs) -= cff;
+
+
+
- 171 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 173 Real cff = (k >= 1) ? FC(i,j,k)-FC(i,j,k-1) : FC(i,j,k);
-
- 175 ru(i,j,k,nrhs) -= cff;
-
+ 171 AMREX_ASSERT(xbx.smallEnd(2) == 0 && xbx.bigEnd(2) == N);
+ 172 ParallelFor(makeSlab(xbx,2,0), [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
+
+ 174 for (
int k = 0; k <= N; ++k)
+
+ 176 rufrc(i,j,0) += ru(i,j,k,nrhs);
-
+ 178 Real cff = om_u(i,j,0)*on_u(i,j,0);
- 180 AMREX_ASSERT(xbx.smallEnd(2) == 0 && xbx.bigEnd(2) == N);
- 181 ParallelFor(makeSlab(xbx,2,0), [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
-
- 183 for (
int k = 0; k <= N; ++k)
-
- 185 rufrc(i,j,0) += ru(i,j,k,nrhs);
+ 180 Real cff1 = (k == N) ? sustr(i,j,0)*cff : 0.0;
+ 181 Real cff2 = (k == 0) ? -bustr(i,j,0)*cff : 0.0;
+
+ 183 rufrc(i,j,0) += cff1+cff2;
+
+
- 187 Real cff = om_u(i,j,0)*on_u(i,j,0);
-
- 189 Real cff1 = (k == N) ? sustr(i,j,0)*cff : 0.0;
- 190 Real cff2 = (k == 0) ? -bustr(i,j,0)*cff : 0.0;
-
- 192 rufrc(i,j,0) += cff1+cff2;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 202 ParallelFor(growLo(ybx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 204 Real cff1=vold(i,j,k,nrhs)+vold(i,j+1,k,nrhs);
+
+
+ 207 Real cff = (cff1 > 0.0) ? vold(i,j-1,k,nrhs)-2.0*vold(i,j,k,nrhs)+ vold(i,j+1,k,nrhs) :
+ 208 vold(i,j,k,nrhs)-2.0*vold(i,j+1,k,nrhs)+ vold(i,j+2,k,nrhs);
-
- 211 ParallelFor(growLo(ybx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 213 Real cff1=vold(i,j,k,nrhs)+vold(i,j+1,k,nrhs);
-
-
- 216 Real cff = (cff1 > 0.0) ? vold(i,j-1,k,nrhs)-2.0*vold(i,j,k,nrhs)+ vold(i,j+1,k,nrhs) :
- 217 vold(i,j,k,nrhs)-2.0*vold(i,j+1,k,nrhs)+ vold(i,j+2,k,nrhs);
-
- 219 Real Hvee_j = Hvom(i,j-1,k)-2.0*Hvom(i,j ,k)+Hvom(i,j+1,k);
- 220 Real Hvee_jp1 = Hvom(i,j ,k)-2.0*Hvom(i,j+1,k)+Hvom(i,j+2,k);
+ 210 Real Hvee_j = Hvom(i,j-1,k)-2.0*Hvom(i,j ,k)+Hvom(i,j+1,k);
+ 211 Real Hvee_jp1 = Hvom(i,j ,k)-2.0*Hvom(i,j+1,k)+Hvom(i,j+2,k);
+
+ 213 VFe(i,j,k) = 0.25 * (cff1+Gadv*cff) * ( Hvom(i,j ,k)+ Hvom(i,j+1,k) + 0.5 * Gadv * (Hvee_j + Hvee_jp1) );
+
+
+
+ 217 ParallelFor(growHi(ybx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 219 Real cff1 = vold(i,j,k,nrhs) + vold(i-1,j ,k,nrhs);
+ 220 Real cff2 = Huon(i,j,k) + Huon(i ,j-1,k);
- 222 VFe(i,j,k) = 0.25 * (cff1+Gadv*cff) * ( Hvom(i,j ,k)+ Hvom(i,j+1,k) + 0.5 * Gadv * (Hvee_j + Hvee_jp1) );
-
-
-
- 226 ParallelFor(growHi(ybx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 228 Real cff1 = vold(i,j,k,nrhs) + vold(i-1,j ,k,nrhs);
- 229 Real cff2 = Huon(i,j,k) + Huon(i ,j-1,k);
-
-
- 232 Real cff = (cff2 > 0.0) ? vold(i-2,j,k,nrhs)-2.0*vold(i-1,j,k,nrhs)+vold(i ,j,k,nrhs) :
- 233 vold(i-1,j,k,nrhs)-2.0*vold(i ,j,k,nrhs)+vold(i+1,j,k,nrhs);
-
- 235 Real Huee_j = Huon(i,j-1,k)-2.0*Huon(i,j ,k)+Huon(i,j+1,k);
- 236 Real Huee_jm1 = Huon(i,j-2,k)-2.0*Huon(i,j-1,k)+Huon(i,j ,k);
-
- 238 VFx(i,j,k) = 0.25*(cff1+Gadv*cff)* (cff2+Gadv*0.5*(Huee_j + Huee_jm1));
-
-
- 241 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 243 rv(i,j,k,nrhs) -= ( (VFx(i+1,j,k)-VFx(i,j,k)) + (VFe(i,j,k)-VFe(i,j-1,k)) );
-
-
- 246 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
-
-
-
-
- 252 FC(i,j,k)=( cff1*(vold(i,j,k ,nrhs)+ vold(i,j,k+1,nrhs))
- 253 -cff2*(vold(i,j,k-1,nrhs)+ vold(i,j,k+2,nrhs)) )*
- 254 ( cff1*(W(i,j ,k)+ W(i,j-1,k))
- 255 -cff2*(W(i,j+1,k)+ W(i,j-2,k)) );
-
-
-
-
- 260 FC(i,j,N-1)=( cff1*(vold(i,j,N-1,nrhs)+ vold(i,j,N ,nrhs))
- 261 -cff2*(vold(i,j,N-2,nrhs)+ vold(i,j,N ,nrhs)) )*
- 262 ( cff1*(W(i,j ,N-1)+ W(i,j-1,N-1))
- 263 -cff2*(W(i,j+1,N-1)+ W(i,j-2,N-1)) );
- 264 FC(i,j,0)=( cff1*(vold(i,j,0,nrhs)+ vold(i,j,1,nrhs))
- 265 -cff2*(vold(i,j,0,nrhs)+ vold(i,j,2,nrhs)) )*
- 266 ( cff1*(W(i,j ,0)+ W(i,j-1,0))
- 267 -cff2*(W(i,j+1,0)+ W(i,j-2,0)) );
-
-
- 270 }); Gpu::synchronize();
+
+ 223 Real cff = (cff2 > 0.0) ? vold(i-2,j,k,nrhs)-2.0*vold(i-1,j,k,nrhs)+vold(i ,j,k,nrhs) :
+ 224 vold(i-1,j,k,nrhs)-2.0*vold(i ,j,k,nrhs)+vold(i+1,j,k,nrhs);
+
+ 226 Real Huee_j = Huon(i,j-1,k)-2.0*Huon(i,j ,k)+Huon(i,j+1,k);
+ 227 Real Huee_jm1 = Huon(i,j-2,k)-2.0*Huon(i,j-1,k)+Huon(i,j ,k);
+
+ 229 VFx(i,j,k) = 0.25*(cff1+Gadv*cff)* (cff2+Gadv*0.5*(Huee_j + Huee_jm1));
+
+
+ 232 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 234 rv(i,j,k,nrhs) -= ( (VFx(i+1,j,k)-VFx(i,j,k)) + (VFe(i,j,k)-VFe(i,j-1,k)) );
+
+
+ 237 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+
+
+
+
+ 243 FC(i,j,k)=( cff1*(vold(i,j,k ,nrhs)+ vold(i,j,k+1,nrhs))
+ 244 -cff2*(vold(i,j,k-1,nrhs)+ vold(i,j,k+2,nrhs)) )*
+ 245 ( cff1*(W(i,j ,k)+ W(i,j-1,k))
+ 246 -cff2*(W(i,j+1,k)+ W(i,j-2,k)) );
+
+
+
+
+ 251 FC(i,j,N-1)=( cff1*(vold(i,j,N-1,nrhs)+ vold(i,j,N ,nrhs))
+ 252 -cff2*(vold(i,j,N-2,nrhs)+ vold(i,j,N ,nrhs)) )*
+ 253 ( cff1*(W(i,j ,N-1)+ W(i,j-1,N-1))
+ 254 -cff2*(W(i,j+1,N-1)+ W(i,j-2,N-1)) );
+ 255 FC(i,j,0)=( cff1*(vold(i,j,0,nrhs)+ vold(i,j,1,nrhs))
+ 256 -cff2*(vold(i,j,0,nrhs)+ vold(i,j,2,nrhs)) )*
+ 257 ( cff1*(W(i,j ,0)+ W(i,j-1,0))
+ 258 -cff2*(W(i,j+1,0)+ W(i,j-2,0)) );
+
+
+ 261 }); Gpu::synchronize();
+
+ 263 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+ 265 Real cff = (k >= 1) ? FC(i,j,k)-FC(i,j,k-1) : FC(i,j,k);
+
+ 267 rv(i,j,k,nrhs) -= cff;
+
+
+
- 272 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
- 274 Real cff = (k >= 1) ? FC(i,j,k)-FC(i,j,k-1) : FC(i,j,k);
-
- 276 rv(i,j,k,nrhs) -= cff;
-
+ 272 AMREX_ASSERT(ybx.smallEnd(2) == 0 && ybx.bigEnd(2) == N);
+ 273 ParallelFor(makeSlab(ybx,2,0), [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
+
+ 275 for (
int k = 0; k <= N; ++k)
+
+ 277 rvfrc(i,j,0) += rv(i,j,k,nrhs);
-
+ 279 Real cff = om_v(i,j,0)*on_v(i,j,0);
- 281 AMREX_ASSERT(ybx.smallEnd(2) == 0 && ybx.bigEnd(2) == N);
- 282 ParallelFor(makeSlab(ybx,2,0), [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
-
- 284 for (
int k = 0; k <= N; ++k)
-
- 286 rvfrc(i,j,0) += rv(i,j,k,nrhs);
-
- 288 Real cff = om_v(i,j,0)*on_v(i,j,0);
-
- 290 Real cff1 = (k == N) ? svstr(i,j,0)*cff : 0.0;
- 291 Real cff2 = (k == 0) ? -bvstr(i,j,0)*cff : 0.0;
-
- 293 rvfrc(i,j,0) += cff1+cff2;
-
-
-
+ 281 Real cff1 = (k == N) ? svstr(i,j,0)*cff : 0.0;
+ 282 Real cff2 = (k == 0) ? -bvstr(i,j,0)*cff : 0.0;
+
+ 284 rvfrc(i,j,0) += cff1+cff2;
+
+
+
@@ -9836,8 +9821,8 @@