Skip to content

Commit

Permalink
Merge pull request #89 from kokkos/remove-wait-in-sycl
Browse files Browse the repository at this point in the history
Remove wait after onemkl dft calls
  • Loading branch information
yasahi-hpc authored Mar 14, 2024
2 parents e9753c2 + 63ad0bf commit e5be444
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 95 deletions.
21 changes: 14 additions & 7 deletions examples/01_1DFFT/01_1DFFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,31 @@ int main(int argc, char* argv[]) {
View1D<Kokkos::complex<double> > xc2c_inv("xc2c_inv", n0);

Kokkos::Random_XorShift64_Pool<> random_pool(12345);
Kokkos::fill_random(xc2c, random_pool, I);
execution_space exec;
Kokkos::fill_random(exec, xc2c, random_pool, I);
exec.fence();

KokkosFFT::fft(execution_space(), xc2c, xc2c_hat);
KokkosFFT::ifft(execution_space(), xc2c_hat, xc2c_inv);
KokkosFFT::fft(exec, xc2c, xc2c_hat);
KokkosFFT::ifft(exec, xc2c_hat, xc2c_inv);
exec.fence();

// 1D R2C FFT
View1D<double> xr2c("xr2c", n0);
View1D<Kokkos::complex<double> > xr2c_hat("xr2c_hat", n0 / 2 + 1);
Kokkos::fill_random(xr2c, random_pool, 1);
Kokkos::fill_random(exec, xr2c, random_pool, 1);
exec.fence();

KokkosFFT::rfft(execution_space(), xr2c, xr2c_hat);
KokkosFFT::rfft(exec, xr2c, xr2c_hat);
exec.fence();

// 1D C2R FFT
View1D<Kokkos::complex<double> > xc2r("xr2c_hat", n0 / 2 + 1);
View1D<double> xc2r_hat("xc2r", n0);
Kokkos::fill_random(xc2r, random_pool, I);
Kokkos::fill_random(exec, xc2r, random_pool, I);
exec.fence();

KokkosFFT::irfft(execution_space(), xc2r, xc2r_hat);
KokkosFFT::irfft(exec, xc2r, xc2r_hat);
exec.fence();
}
Kokkos::finalize();

Expand Down
21 changes: 14 additions & 7 deletions examples/02_2DFFT/02_2DFFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,31 @@ int main(int argc, char* argv[]) {
View2D<Kokkos::complex<double> > xc2c_inv("xc2c_inv", n0, n1);

Kokkos::Random_XorShift64_Pool<> random_pool(12345);
Kokkos::fill_random(xc2c, random_pool, I);
execution_space exec;
Kokkos::fill_random(exec, xc2c, random_pool, I);
exec.fence();

KokkosFFT::fft2(execution_space(), xc2c, xc2c_hat);
KokkosFFT::ifft2(execution_space(), xc2c_hat, xc2c_inv);
KokkosFFT::fft2(exec, xc2c, xc2c_hat);
KokkosFFT::ifft2(exec, xc2c_hat, xc2c_inv);
exec.fence();

// 2D R2C FFT
View2D<double> xr2c("xr2c", n0, n1);
View2D<Kokkos::complex<double> > xr2c_hat("xr2c_hat", n0, n1 / 2 + 1);
Kokkos::fill_random(xr2c, random_pool, 1);
Kokkos::fill_random(exec, xr2c, random_pool, 1);
exec.fence();

KokkosFFT::rfft2(execution_space(), xr2c, xr2c_hat);
KokkosFFT::rfft2(exec, xr2c, xr2c_hat);
exec.fence();

// 2D C2R FFT
View2D<Kokkos::complex<double> > xc2r("xr2c_hat", n0, n1 / 2 + 1);
View2D<double> xc2r_hat("xc2r", n0, n1);
Kokkos::fill_random(xc2r, random_pool, I);
Kokkos::fill_random(exec, xc2r, random_pool, I);
exec.fence();

KokkosFFT::irfft2(execution_space(), xc2r, xc2r_hat);
KokkosFFT::irfft2(exec, xc2r, xc2r_hat);
exec.fence();
}
Kokkos::finalize();

Expand Down
21 changes: 14 additions & 7 deletions examples/03_NDFFT/03_NDFFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,31 @@ int main(int argc, char* argv[]) {
View3D<Kokkos::complex<double> > xc2c_inv("xc2c_inv", n0, n1, n2);

Kokkos::Random_XorShift64_Pool<> random_pool(12345);
Kokkos::fill_random(xc2c, random_pool, I);
execution_space exec;
Kokkos::fill_random(exec, xc2c, random_pool, I);
exec.fence();

KokkosFFT::fftn(execution_space(), xc2c, xc2c_hat);
KokkosFFT::ifftn(execution_space(), xc2c_hat, xc2c_inv);
KokkosFFT::fftn(exec, xc2c, xc2c_hat);
KokkosFFT::ifftn(exec, xc2c_hat, xc2c_inv);
exec.fence();

// 3D R2C FFT
View3D<double> xr2c("xr2c", n0, n1, n2);
View3D<Kokkos::complex<double> > xr2c_hat("xr2c_hat", n0, n1, n2 / 2 + 1);
Kokkos::fill_random(xr2c, random_pool, 1);
Kokkos::fill_random(exec, xr2c, random_pool, 1);
exec.fence();

KokkosFFT::rfftn(execution_space(), xr2c, xr2c_hat);
KokkosFFT::rfftn(exec, xr2c, xr2c_hat);
exec.fence();

// 3D C2R FFT
View3D<Kokkos::complex<double> > xc2r("xr2c_hat", n0, n1, n2 / 2 + 1);
View3D<double> xc2r_hat("xc2r", n0, n1, n2);
Kokkos::fill_random(xc2r, random_pool, I);
Kokkos::fill_random(exec, xc2r, random_pool, I);
exec.fence();

KokkosFFT::irfftn(execution_space(), xc2r, xc2r_hat);
KokkosFFT::irfftn(exec, xc2r, xc2r_hat);
exec.fence();
}
Kokkos::finalize();

Expand Down
27 changes: 17 additions & 10 deletions examples/04_batchedFFT/04_batchedFFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,35 @@ int main(int argc, char* argv[]) {
View3D<Kokkos::complex<double> > xc2c_inv("xc2c_inv", n0, n1, n2);

Kokkos::Random_XorShift64_Pool<> random_pool(12345);
Kokkos::fill_random(xc2c, random_pool, I);
execution_space exec;
Kokkos::fill_random(exec, xc2c, random_pool, I);
exec.fence();

KokkosFFT::fft(execution_space(), xc2c, xc2c_hat,
KokkosFFT::Normalization::backward, /*axis=*/-1);
KokkosFFT::ifft(execution_space(), xc2c_hat, xc2c_inv,
KokkosFFT::fft(exec, xc2c, xc2c_hat, KokkosFFT::Normalization::backward,
/*axis=*/-1);
KokkosFFT::ifft(exec, xc2c_hat, xc2c_inv,
KokkosFFT::Normalization::backward, /*axis=*/-1);
exec.fence();

// 1D batched R2C FFT
View3D<double> xr2c("xr2c", n0, n1, n2);
View3D<Kokkos::complex<double> > xr2c_hat("xr2c_hat", n0, n1, n2 / 2 + 1);
Kokkos::fill_random(xr2c, random_pool, 1);
Kokkos::fill_random(exec, xr2c, random_pool, 1);
exec.fence();

KokkosFFT::rfft(execution_space(), xr2c, xr2c_hat,
KokkosFFT::Normalization::backward, /*axis=*/-1);
KokkosFFT::rfft(exec, xr2c, xr2c_hat, KokkosFFT::Normalization::backward,
/*axis=*/-1);
exec.fence();

// 1D batched C2R FFT
View3D<Kokkos::complex<double> > xc2r("xr2c_hat", n0, n1, n2 / 2 + 1);
View3D<double> xc2r_hat("xc2r", n0, n1, n2);
Kokkos::fill_random(xc2r, random_pool, I);
Kokkos::fill_random(exec, xc2r, random_pool, I);
exec.fence();

KokkosFFT::irfft(execution_space(), xc2r, xc2r_hat,
KokkosFFT::Normalization::backward, /*axis=*/-1);
KokkosFFT::irfft(exec, xc2r, xc2r_hat, KokkosFFT::Normalization::backward,
/*axis=*/-1);
exec.fence();
}
Kokkos::finalize();

Expand Down
34 changes: 23 additions & 11 deletions examples/05_1DFFT_HOST_DEVICE/05_1DFFT_HOST_DEVICE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,31 @@ int main(int argc, char* argv[]) {
View1D<Kokkos::complex<double> > xc2c_inv("xc2c_inv", n0);

Kokkos::Random_XorShift64_Pool<> random_pool(12345);
Kokkos::fill_random(xc2c, random_pool, I);
execution_space exec;
Kokkos::fill_random(exec, xc2c, random_pool, I);
exec.fence();

KokkosFFT::fft(execution_space(), xc2c, xc2c_hat);
KokkosFFT::ifft(execution_space(), xc2c_hat, xc2c_inv);
KokkosFFT::fft(exec, xc2c, xc2c_hat);
KokkosFFT::ifft(exec, xc2c_hat, xc2c_inv);
exec.fence();

// 1D R2C FFT
View1D<double> xr2c("xr2c", n0);
View1D<Kokkos::complex<double> > xr2c_hat("xr2c_hat", n0 / 2 + 1);
Kokkos::fill_random(xr2c, random_pool, 1);
Kokkos::fill_random(exec, xr2c, random_pool, 1);
exec.fence();

KokkosFFT::rfft(execution_space(), xr2c, xr2c_hat);
KokkosFFT::rfft(exec, xr2c, xr2c_hat);
exec.fence();

// 1D C2R FFT
View1D<Kokkos::complex<double> > xc2r("xr2c_hat", n0 / 2 + 1);
View1D<double> xc2r_hat("xc2r", n0);
Kokkos::fill_random(xc2r, random_pool, I);
Kokkos::fill_random(exec, xc2r, random_pool, I);
exec.fence();

KokkosFFT::irfft(execution_space(), xc2r, xc2r_hat);
KokkosFFT::irfft(exec, xc2r, xc2r_hat);
exec.fence();

#ifdef ENABLE_HOST_AND_DEVICE
// FFTs on Host
Expand All @@ -54,22 +61,27 @@ int main(int argc, char* argv[]) {

Kokkos::deep_copy(h_xc2c, xc2c);

KokkosFFT::fft(host_execution_space(), h_xc2c, h_xc2c_hat);
KokkosFFT::ifft(host_execution_space(), h_xc2c_hat, h_xc2c_inv);
host_execution_space host_exec;

KokkosFFT::fft(host_exec, h_xc2c, h_xc2c_hat);
KokkosFFT::ifft(host_exec, h_xc2c_hat, h_xc2c_inv);
host_exec.fence();

// 1D R2C FFT
HostView1D<double> h_xr2c("h_xr2c", n0);
HostView1D<Kokkos::complex<double> > h_xr2c_hat("h_xr2c_hat", n0 / 2 + 1);

Kokkos::deep_copy(h_xr2c, xr2c);
KokkosFFT::rfft(host_execution_space(), h_xr2c, h_xr2c_hat);
KokkosFFT::rfft(host_exec, h_xr2c, h_xr2c_hat);
host_exec.fence();

// 1D C2R FFT
HostView1D<Kokkos::complex<double> > h_xc2r("h_xr2c_hat", n0 / 2 + 1);
HostView1D<double> h_xc2r_hat("h_xc2r", n0);

Kokkos::deep_copy(h_xc2r, xc2r);
KokkosFFT::irfft(host_execution_space(), h_xc2r, h_xc2r_hat);
KokkosFFT::irfft(host_exec, h_xc2r, h_xc2r_hat);
host_exec.fence();
#endif
}
Kokkos::finalize();
Expand Down
30 changes: 19 additions & 11 deletions examples/06_1DFFT_reuse_plans/06_1DFFT_reuse_plans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,42 @@ int main(int argc, char* argv[]) {
View1D<Kokkos::complex<double> > xc2c_inv("xc2c_inv", n0);

Kokkos::Random_XorShift64_Pool<> random_pool(12345);
Kokkos::fill_random(xc2c, random_pool, I);
execution_space exec;
Kokkos::fill_random(exec, xc2c, random_pool, I);
exec.fence();

int axis = -1;
KokkosFFT::Impl::Plan fft_plan(execution_space(), xc2c, xc2c_hat,
KokkosFFT::Impl::Plan fft_plan(exec, xc2c, xc2c_hat,
KokkosFFT::Direction::forward, axis);
KokkosFFT::fft(execution_space(), xc2c, xc2c_hat, fft_plan);
KokkosFFT::fft(exec, xc2c, xc2c_hat, fft_plan);
exec.fence();

KokkosFFT::Impl::Plan ifft_plan(execution_space(), xc2c_hat, xc2c_inv,
KokkosFFT::Impl::Plan ifft_plan(exec, xc2c_hat, xc2c_inv,
KokkosFFT::Direction::backward, axis);
KokkosFFT::ifft(execution_space(), xc2c_hat, xc2c_inv, ifft_plan);
KokkosFFT::ifft(exec, xc2c_hat, xc2c_inv, ifft_plan);
exec.fence();

// 1D R2C FFT
View1D<double> xr2c("xr2c", n0);
View1D<Kokkos::complex<double> > xr2c_hat("xr2c_hat", n0 / 2 + 1);
Kokkos::fill_random(xr2c, random_pool, 1);
Kokkos::fill_random(exec, xr2c, random_pool, 1);
exec.fence();

KokkosFFT::Impl::Plan rfft_plan(execution_space(), xr2c, xr2c_hat,
KokkosFFT::Impl::Plan rfft_plan(exec, xr2c, xr2c_hat,
KokkosFFT::Direction::forward, axis);
KokkosFFT::rfft(execution_space(), xr2c, xr2c_hat, rfft_plan);
KokkosFFT::rfft(exec, xr2c, xr2c_hat, rfft_plan);
exec.fence();

// 1D C2R FFT
View1D<Kokkos::complex<double> > xc2r("xc2r_hat", n0 / 2 + 1);
View1D<double> xc2r_hat("xc2r", n0);
Kokkos::fill_random(xc2r, random_pool, I);
Kokkos::fill_random(exec, xc2r, random_pool, I);
exec.fence();

KokkosFFT::Impl::Plan irfft_plan(execution_space(), xc2r, xc2r_hat,
KokkosFFT::Impl::Plan irfft_plan(exec, xc2r, xc2r_hat,
KokkosFFT::Direction::backward, axis);
KokkosFFT::irfft(execution_space(), xc2r, xc2r_hat, irfft_plan);
KokkosFFT::irfft(exec, xc2r, xc2r_hat, irfft_plan);
exec.fence();
}
Kokkos::finalize();

Expand Down
46 changes: 18 additions & 28 deletions fft/src/KokkosFFT_SYCL_transform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,59 +12,49 @@ namespace KokkosFFT {
namespace Impl {
template <typename PlanType, typename... Args>
void _exec(PlanType& plan, float* idata, std::complex<float>* odata,
[[maybe_unused]] int direction, [[maybe_unused]] Args... args) {
auto r2c = oneapi::mkl::dft::compute_forward(plan, idata,
reinterpret_cast<float*>(odata));
r2c.wait();
int /*direction*/, Args...) {
oneapi::mkl::dft::compute_forward(plan, idata,
reinterpret_cast<float*>(odata));
}

template <typename PlanType, typename... Args>
void _exec(PlanType& plan, double* idata, std::complex<double>* odata,
[[maybe_unused]] int direction, [[maybe_unused]] Args... args) {
auto d2z = oneapi::mkl::dft::compute_forward(
plan, idata, reinterpret_cast<double*>(odata));
d2z.wait();
int /*direction*/, Args...) {
oneapi::mkl::dft::compute_forward(plan, idata,
reinterpret_cast<double*>(odata));
}

template <typename PlanType, typename... Args>
void _exec(PlanType& plan, std::complex<float>* idata, float* odata,
[[maybe_unused]] int direction, [[maybe_unused]] Args... args) {
auto c2r = oneapi::mkl::dft::compute_backward(
plan, reinterpret_cast<float*>(idata), odata);
c2r.wait();
int /*direction*/, Args...) {
oneapi::mkl::dft::compute_backward(plan, reinterpret_cast<float*>(idata),
odata);
}

template <typename PlanType, typename... Args>
void _exec(PlanType& plan, std::complex<double>* idata, double* odata,
[[maybe_unused]] int direction, [[maybe_unused]] Args... args) {
auto z2d = oneapi::mkl::dft::compute_backward(
plan, reinterpret_cast<double*>(idata), odata);
z2d.wait();
int /*direction*/, Args...) {
oneapi::mkl::dft::compute_backward(plan, reinterpret_cast<double*>(idata),
odata);
}

template <typename PlanType, typename... Args>
void _exec(PlanType& plan, std::complex<float>* idata,
std::complex<float>* odata, [[maybe_unused]] int direction,
[[maybe_unused]] Args... args) {
std::complex<float>* odata, int direction, Args...) {
if (direction == 1) {
auto c2c = oneapi::mkl::dft::compute_forward(plan, idata, odata);
c2c.wait();
oneapi::mkl::dft::compute_forward(plan, idata, odata);
} else {
auto c2c = oneapi::mkl::dft::compute_backward(plan, idata, odata);
c2c.wait();
oneapi::mkl::dft::compute_backward(plan, idata, odata);
}
}

template <typename PlanType, typename... Args>
void _exec(PlanType& plan, std::complex<double>* idata,
std::complex<double>* odata, [[maybe_unused]] int direction,
[[maybe_unused]] Args... args) {
std::complex<double>* odata, int direction, Args...) {
if (direction == 1) {
auto z2z = oneapi::mkl::dft::compute_forward(plan, idata, odata);
z2z.wait();
oneapi::mkl::dft::compute_forward(plan, idata, odata);
} else {
auto z2z = oneapi::mkl::dft::compute_backward(plan, idata, odata);
z2z.wait();
oneapi::mkl::dft::compute_backward(plan, idata, odata);
}
}
} // namespace Impl
Expand Down
Loading

0 comments on commit e5be444

Please sign in to comment.