diff --git a/common/src/KokkosFFT_layouts.hpp b/common/src/KokkosFFT_layouts.hpp index 4dbeae37..f401c6ef 100644 --- a/common/src/KokkosFFT_layouts.hpp +++ b/common/src/KokkosFFT_layouts.hpp @@ -21,7 +21,8 @@ namespace Impl { */ template auto get_extents(const InViewType& in, const OutViewType& out, - axis_type axes, shape_type shape = {0}) { + axis_type axes, shape_type shape = {}, + bool is_inplace = false) { using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; using array_layout_type = typename InViewType::array_layout; @@ -65,20 +66,28 @@ auto get_extents(const InViewType& in, const OutViewType& out, if constexpr (is_real_v) { // Then R2C - KOKKOSFFT_THROW_IF( - _out_extents.at(inner_most_axis) != - _in_extents.at(inner_most_axis) / 2 + 1, - "For R2C, the 'output extent' of transform must be equal to " - "'input extent'/2 + 1"); + if (is_inplace) { + _in_extents.at(inner_most_axis) = _out_extents.at(inner_most_axis) * 2; + } else { + KOKKOSFFT_THROW_IF( + _out_extents.at(inner_most_axis) != + _in_extents.at(inner_most_axis) / 2 + 1, + "For R2C, the 'output extent' of transform must be equal to " + "'input extent'/2 + 1"); + } } if constexpr (is_real_v) { // Then C2R - KOKKOSFFT_THROW_IF( - _in_extents.at(inner_most_axis) != - _out_extents.at(inner_most_axis) / 2 + 1, - "For C2R, the 'input extent' of transform must be equal to " - "'output extent' / 2 + 1"); + if (is_inplace) { + _out_extents.at(inner_most_axis) = _in_extents.at(inner_most_axis) * 2; + } else { + KOKKOSFFT_THROW_IF( + _in_extents.at(inner_most_axis) != + _out_extents.at(inner_most_axis) / 2 + 1, + "For C2R, the 'input extent' of transform must be equal to " + "'output extent' / 2 + 1"); + } } if constexpr (std::is_same_v) { diff --git a/common/src/KokkosFFT_utils.hpp b/common/src/KokkosFFT_utils.hpp index 93a2e594..2009e3ce 100644 --- a/common/src/KokkosFFT_utils.hpp +++ b/common/src/KokkosFFT_utils.hpp @@ -17,6 +17,11 @@ namespace KokkosFFT { namespace Impl { +template +bool are_aliasing(const ScalarType1* ptr1, const ScalarType2* ptr2) { + return (static_cast(ptr1) == static_cast(ptr2)); +} + template auto convert_negative_axis(ViewType, int _axis = -1) { static_assert(Kokkos::is_view_v, diff --git a/common/unit_test/Test_Utils.cpp b/common/unit_test/Test_Utils.cpp index 9df472fe..8861e4c0 100644 --- a/common/unit_test/Test_Utils.cpp +++ b/common/unit_test/Test_Utils.cpp @@ -11,6 +11,13 @@ using test_types = ::testing::Types; // Int like types using base_int_types = ::testing::Types; +// value type combinations that are tested +using paired_scalar_types = ::testing::Types< + std::pair, std::pair>, + std::pair, Kokkos::complex>, + std::pair, std::pair>, + std::pair, Kokkos::complex>>; + // Basically the same fixtures, used for labeling tests template struct ConvertNegativeAxis : public ::testing::Test { @@ -30,9 +37,16 @@ struct ContainerTypes : public ::testing::Test { using array_type = std::array; }; +template +struct PairedScalarTypes : public ::testing::Test { + using value_type1 = typename T::first_type; + using value_type2 = typename T::second_type; +}; + TYPED_TEST_SUITE(ConvertNegativeAxis, test_types); TYPED_TEST_SUITE(ConvertNegativeShift, test_types); TYPED_TEST_SUITE(ContainerTypes, base_int_types); +TYPED_TEST_SUITE(PairedScalarTypes, paired_scalar_types); // Tests for convert_negative_axes over ND views template @@ -550,3 +564,25 @@ TEST(ToArray, lvalue) { TEST(ToArray, rvalue) { ASSERT_EQ(KokkosFFT::Impl::to_array(std::array{1, 2}), (Kokkos::Array{1, 2})); } + +template +void test_are_pointers_aliasing() { + using View1 = Kokkos::View; + using View2 = Kokkos::View; + + const int n1 = 10; + // sizeof ValeuType2 is larger or equal to ValueType1 + const int n2 = sizeof(ValueType1) == sizeof(ValueType2) ? n1 : n1 / 2 + 1; + View1 view1("view1", n1); + View2 view2("view2", n1); + View2 uview2(reinterpret_cast(view1.data()), n2); + + EXPECT_TRUE(KokkosFFT::Impl::are_aliasing(view1.data(), uview2.data())); + EXPECT_FALSE(KokkosFFT::Impl::are_aliasing(view1.data(), view2.data())); +} + +TYPED_TEST(PairedScalarTypes, are_pointers_aliasing) { + using value_type1 = typename TestFixture::value_type1; + using value_type2 = typename TestFixture::value_type2; + test_are_pointers_aliasing(); +} diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 850f71bb..eaf43486 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -22,7 +22,8 @@ template & plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<1> axes, shape_type<1> s) { + Direction /*direction*/, axis_type<1> axes, shape_type<1> s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -44,7 +45,7 @@ auto create_plan(const ExecutionSpace& exec_space, auto type = KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); const int nx = fft_extents.at(0); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); @@ -64,7 +65,8 @@ template & plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<2> axes, shape_type<2> s) { + Direction /*direction*/, axis_type<2> axes, shape_type<2> s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -86,7 +88,7 @@ auto create_plan(const ExecutionSpace& exec_space, auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); const int nx = fft_extents.at(0), ny = fft_extents.at(1); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); @@ -106,7 +108,8 @@ template & plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<3> axes, shape_type<3> s) { + Direction /*direction*/, axis_type<3> axes, shape_type<3> s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -128,7 +131,7 @@ auto create_plan(const ExecutionSpace& exec_space, auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); const int nx = fft_extents.at(0), ny = fft_extents.at(1), nz = fft_extents.at(2); @@ -151,7 +154,7 @@ auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, Direction /*direction*/, axis_type axes, - shape_type s) { + shape_type s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -172,7 +175,7 @@ auto create_plan(const ExecutionSpace& exec_space, KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, std::multiplies<>()); int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index 2ab41a4c..0a8b8402 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -22,7 +22,8 @@ template & plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<1> axes, shape_type<1> s) { + Direction /*direction*/, axis_type<1> axes, shape_type<1> s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -44,7 +45,7 @@ auto create_plan(const ExecutionSpace& exec_space, auto type = KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); const int nx = fft_extents.at(0); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); @@ -64,7 +65,8 @@ template & plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<2> axes, shape_type<2> s) { + Direction /*direction*/, axis_type<2> axes, shape_type<2> s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -86,7 +88,7 @@ auto create_plan(const ExecutionSpace& exec_space, auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); const int nx = fft_extents.at(0), ny = fft_extents.at(1); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); @@ -106,7 +108,8 @@ template & plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<3> axes, shape_type<3> s) { + Direction /*direction*/, axis_type<3> axes, shape_type<3> s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -128,7 +131,7 @@ auto create_plan(const ExecutionSpace& exec_space, auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); const int nx = fft_extents.at(0), ny = fft_extents.at(1), nz = fft_extents.at(2); @@ -151,7 +154,7 @@ auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, Direction /*direction*/, axis_type axes, - shape_type s) { + shape_type s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -172,7 +175,7 @@ auto create_plan(const ExecutionSpace& exec_space, KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, std::multiplies<>()); int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, diff --git a/fft/src/KokkosFFT_Host_plans.hpp b/fft/src/KokkosFFT_Host_plans.hpp index 3dc7bed1..40de5bc5 100644 --- a/fft/src/KokkosFFT_Host_plans.hpp +++ b/fft/src/KokkosFFT_Host_plans.hpp @@ -38,7 +38,7 @@ auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, Direction direction, axis_type axes, - shape_type s) { + shape_type s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -64,7 +64,7 @@ auto create_plan(const ExecutionSpace& exec_space, KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, std::multiplies<>()); int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, diff --git a/fft/src/KokkosFFT_Plans.hpp b/fft/src/KokkosFFT_Plans.hpp index b80bc139..f4b1e114 100644 --- a/fft/src/KokkosFFT_Plans.hpp +++ b/fft/src/KokkosFFT_Plans.hpp @@ -126,6 +126,9 @@ class Plan { //! whether crop or pad is needed or not bool m_is_crop_or_pad_needed = false; + //! in-place transform or not + bool m_is_inplace = false; + //! axes for fft axis_type m_axes; @@ -199,8 +202,16 @@ class Plan { m_shape = KokkosFFT::Impl::get_modified_shape(in, out, s, m_axes); m_is_crop_or_pad_needed = KokkosFFT::Impl::is_crop_or_pad_needed(in, m_shape); - m_fft_size = KokkosFFT::Impl::create_plan( - exec_space, m_plan, in, out, m_buffer, m_info, direction, m_axes, s); + m_is_inplace = KokkosFFT::Impl::are_aliasing(in.data(), out.data()); + KOKKOSFFT_THROW_IF(m_is_inplace && m_is_transpose_needed, + "In-place transform is not supported with transpose. " + "Please use out-of-place transform."); + KOKKOSFFT_THROW_IF(m_is_inplace && m_is_crop_or_pad_needed, + "In-place transform is not supported with reshape. " + "Please use out-of-place transform."); + m_fft_size = KokkosFFT::Impl::create_plan(exec_space, m_plan, in, out, + m_buffer, m_info, direction, + m_axes, s, m_is_inplace); } /// \brief Constructor for multidimensional FFT @@ -236,15 +247,15 @@ class Plan { KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, m_axes), "axes are invalid for in/out views"); if constexpr (KokkosFFT::Impl::is_real_v) { - KOKKOSFFT_THROW_IF( - m_direction != KokkosFFT::Direction::forward, - "real to complex transform is constructed with backward direction."); + KOKKOSFFT_THROW_IF(m_direction != KokkosFFT::Direction::forward, + "real to complex transform is constructed " + "with backward direction."); } if constexpr (KokkosFFT::Impl::is_real_v) { - KOKKOSFFT_THROW_IF( - m_direction != KokkosFFT::Direction::backward, - "complex to real transform is constructed with forward direction."); + KOKKOSFFT_THROW_IF(m_direction != KokkosFFT::Direction::backward, + "complex to real transform is constructed " + "with forward direction."); } m_in_extents = KokkosFFT::Impl::extract_extents(in); @@ -254,8 +265,16 @@ class Plan { m_shape = KokkosFFT::Impl::get_modified_shape(in, out, s, m_axes); m_is_crop_or_pad_needed = KokkosFFT::Impl::is_crop_or_pad_needed(in, m_shape); - m_fft_size = KokkosFFT::Impl::create_plan( - exec_space, m_plan, in, out, m_buffer, m_info, direction, axes, s); + m_is_inplace = KokkosFFT::Impl::are_aliasing(in.data(), out.data()); + KOKKOSFFT_THROW_IF(m_is_inplace && m_is_transpose_needed, + "In-place transform is not supported with transpose. " + "Please use out-of-place transform."); + KOKKOSFFT_THROW_IF(m_is_inplace && m_is_crop_or_pad_needed, + "In-place transform is not supported with reshape. " + "Please use out-of-place transform."); + m_fft_size = + KokkosFFT::Impl::create_plan(exec_space, m_plan, in, out, m_buffer, + m_info, direction, axes, s, m_is_inplace); } ~Plan() { @@ -336,6 +355,20 @@ class Plan { auto const direction = KokkosFFT::Impl::direction_type(m_direction); KokkosFFT::Impl::exec_plan(*m_plan, idata, odata, direction, m_info); + + if constexpr (KokkosFFT::Impl::is_complex_v && + KokkosFFT::Impl::is_real_v) { + if (m_is_inplace) { + // For the in-place Complex to Real transform, the output must be + // reshaped to fit the original size (in.size() * 2) for correct + // normalization + Kokkos::View out_tmp(out.data(), + in.size() * 2); + KokkosFFT::Impl::normalize(m_exec_space, out_tmp, m_direction, norm, + m_fft_size); + return; + } + } KokkosFFT::Impl::normalize(m_exec_space, out, m_direction, norm, m_fft_size); } @@ -350,13 +383,13 @@ class Plan { auto in_extents = KokkosFFT::Impl::extract_extents(in); auto out_extents = KokkosFFT::Impl::extract_extents(out); - KOKKOSFFT_THROW_IF( - in_extents != m_in_extents, - "extents of input View for plan and execution are not identical."); + KOKKOSFFT_THROW_IF(in_extents != m_in_extents, + "extents of input View for plan and " + "execution are not identical."); - KOKKOSFFT_THROW_IF( - out_extents != m_out_extents, - "extents of output View for plan and execution are not identical."); + KOKKOSFFT_THROW_IF(out_extents != m_out_extents, + "extents of output View for plan and " + "execution are not identical."); } }; } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index 08d7457d..88ed85ae 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -11,6 +11,7 @@ #include "KokkosFFT_layouts.hpp" #include "KokkosFFT_traits.hpp" #include "KokkosFFT_asserts.hpp" +#include "KokkosFFT_utils.hpp" namespace KokkosFFT { namespace Impl { @@ -91,7 +92,8 @@ auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, BufferViewType& buffer, InfoType& execution_info, Direction direction, - axis_type axes, shape_type s) { + axis_type axes, shape_type s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -111,7 +113,7 @@ auto create_plan(const ExecutionSpace& exec_space, KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, std::multiplies<>()); int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, @@ -152,7 +154,8 @@ auto create_plan(const ExecutionSpace& exec_space, "rocfft_plan_description_set_data_layout failed"); // Out-of-place transform - const rocfft_result_placement place = rocfft_placement_notinplace; + const rocfft_result_placement place = + is_inplace ? rocfft_placement_inplace : rocfft_placement_notinplace; // Create a plan plan = std::make_unique(); diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index 2b36a4b0..0e1bca83 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -10,6 +10,7 @@ #include "KokkosFFT_SYCL_types.hpp" #include "KokkosFFT_layouts.hpp" #include "KokkosFFT_traits.hpp" +#include "KokkosFFT_utils.hpp" namespace KokkosFFT { namespace Impl { @@ -53,7 +54,7 @@ auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, BufferViewType&, InfoType&, Direction /*direction*/, axis_type axes, - shape_type s) { + shape_type s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -68,7 +69,7 @@ auto create_plan(const ExecutionSpace& exec_space, "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s); + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, std::multiplies<>()); int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, @@ -98,7 +99,8 @@ auto create_plan(const ExecutionSpace& exec_space, static_cast(howmany)); // Data layout in conjugate-even domain - plan->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_NOT_INPLACE); + int placement = is_inplace ? DFTI_INPLACE : DFTI_NOT_INPLACE; + plan->set_value(oneapi::mkl::dft::config_param::PLACEMENT, placement); plan->set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); diff --git a/fft/unit_test/Test_Transform.cpp b/fft/unit_test/Test_Transform.cpp index 2607fab5..a00bcd31 100644 --- a/fft/unit_test/Test_Transform.cpp +++ b/fft/unit_test/Test_Transform.cpp @@ -13,92 +13,10 @@ template using shape_type = KokkosFFT::shape_type; -/// Kokkos equivalent of fft1 with numpy -/// def fft1(x): -/// L = len(x) -/// phase = -2j * np.pi * (np.arange(L) / L) -/// phase = np.arange(L).reshape(-1, 1) * phase -/// return np.sum(x*np.exp(phase), axis=1) -template -void fft1(ViewType& in, ViewType& out) { - using value_type = typename ViewType::non_const_value_type; - using real_value_type = KokkosFFT::Impl::base_floating_point_type; - - static_assert(KokkosFFT::Impl::is_complex_v, - "fft1: ViewType must be complex"); - - const value_type I(0.0, 1.0); - std::size_t L = in.size(); - - Kokkos::parallel_for( - "KokkosFFT::Test::fft1", - Kokkos::TeamPolicy(L, Kokkos::AUTO), - KOKKOS_LAMBDA( - const Kokkos::TeamPolicy::member_type& team_member) { - const int j = team_member.league_rank(); - - value_type sum = 0; - Kokkos::parallel_reduce( - Kokkos::TeamThreadRange(team_member, L), - [&](const int i, value_type& lsum) { - auto phase = -2 * I * M_PI * static_cast(i) / - static_cast(L); - - auto tmp_in = in(i); - lsum += - tmp_in * Kokkos::exp(static_cast(j) * phase); - }, - sum); - - out(j) = sum; - }); -} - -/// Kokkos equivalent of ifft1 with numpy -/// def ifft1(x): -/// L = len(x) -/// phase = 2j * np.pi * (np.arange(L) / L) -/// phase = np.arange(L).reshape(-1, 1) * phase -/// return np.sum(x*np.exp(phase), axis=1) -template -void ifft1(ViewType& in, ViewType& out) { - using value_type = typename ViewType::non_const_value_type; - using real_value_type = KokkosFFT::Impl::base_floating_point_type; - - static_assert(KokkosFFT::Impl::is_complex_v, - "ifft1: ViewType must be complex"); - - const value_type I(0.0, 1.0); - std::size_t L = in.size(); - - Kokkos::parallel_for( - "KokkosFFT::Test::ifft1", - Kokkos::TeamPolicy(L, Kokkos::AUTO), - KOKKOS_LAMBDA( - const Kokkos::TeamPolicy::member_type& team_member) { - const int j = team_member.league_rank(); - - value_type sum = 0; - Kokkos::parallel_reduce( - Kokkos::TeamThreadRange(team_member, L), - [&](const int i, value_type& lsum) { - auto phase = 2 * I * M_PI * static_cast(i) / - static_cast(L); - - auto tmp_in = in(i); - lsum += - tmp_in * Kokkos::exp(static_cast(j) * phase); - }, - sum); - - out(j) = sum; - }); -} - using test_types = ::testing::Types, std::pair, std::pair, - std::pair >; + std::pair>; // Basically the same fixtures, used for labeling tests template @@ -156,6 +74,46 @@ void test_fft1_identity(T atol = 1.0e-12) { } } +template +void test_fft1_identity_inplace(T atol = 1.0e-12) { + const int maxlen = 32; + using RealView1DType = Kokkos::View; + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + for (int i = 1; i < maxlen; i++) { + ComplexView1DType a("a", i), a_ref("a_ref", i); + ComplexView1DType a_hat(a.data(), i), inv_a_hat(a.data(), i); + + // Used for Real transforms + ComplexView1DType ar_hat("ar_hat", i / 2 + 1); + RealView1DType ar(reinterpret_cast(ar_hat.data()), i), + inv_ar_hat(reinterpret_cast(ar_hat.data()), i); + RealView1DType ar_ref("ar_ref", i); + + const Kokkos::complex z(1.0, 1.0); + Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); + Kokkos::fill_random(a, random_pool, z); + Kokkos::fill_random(ar, random_pool, 1.0); + + Kokkos::deep_copy(a_ref, a); + Kokkos::deep_copy(ar_ref, ar); + + Kokkos::fence(); + + KokkosFFT::fft(execution_space(), a, a_hat); + KokkosFFT::ifft(execution_space(), a_hat, inv_a_hat); + + KokkosFFT::rfft(execution_space(), ar, ar_hat); + KokkosFFT::irfft(execution_space(), ar_hat, inv_ar_hat); + + Kokkos::fence(); + + EXPECT_TRUE(allclose(inv_a_hat, a_ref, 1.e-5, atol)); + EXPECT_TRUE(allclose(inv_ar_hat, ar_ref, 1.e-5, atol)); + } +} + template void test_fft1_identity_reuse_plan(T atol = 1.0e-12) { const int maxlen = 32; @@ -1156,6 +1114,15 @@ TYPED_TEST(FFT1D, Identity_1DView) { test_fft1_identity(atol); } +// Identity tests on 1D Views for in-place transform +TYPED_TEST(FFT1D, Identity_1DView_inplace) { + using float_type = typename TestFixture::float_type; + using layout_type = typename TestFixture::layout_type; + + float_type atol = std::is_same_v ? 1.0e-6 : 1.0e-12; + test_fft1_identity_inplace(atol); +} + // Identity tests on 1D Views with plan reuse TYPED_TEST(FFT1D, Identity_1DView_reuse_plans) { using float_type = typename TestFixture::float_type; @@ -1700,6 +1667,99 @@ void test_fft2_2dfft_2dview_shape(T atol = 1.0e-12) { } } +template +void test_fft2_2dfft_2dview_inplace([[maybe_unused]] T atol = 1.0e-12) { + const int n0 = 4, n1 = 6; + using RealView2DType = Kokkos::View; + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + ComplexView2DType x("x", n0, n1), x_ref("x_ref", n0, n1); + ComplexView2DType x_hat(x.data(), n0, n1), inv_x_hat(x.data(), n0, n1); + + // Used for real transforms + ComplexView2DType xr_hat("xr_hat", n0, n1 / 2 + 1), + xr_hat_ref("xr_hat_ref", n0, n1 / 2 + 1); + RealView2DType xr_ref("xr_ref", n0, n1), + inv_xr_hat_unpadded("inv_xr_hat_unpadded", n0, n1), + inv_xr_hat_ref("inv_xr_hat_ref", n0, n1); + + // Unmanged views for in-place transforms + RealView2DType xr(reinterpret_cast(xr_hat.data()), n0, n1), + inv_xr_hat(reinterpret_cast(xr_hat.data()), n0, n1); + RealView2DType xr_padded(reinterpret_cast(xr_hat.data()), n0, + (n1 / 2 + 1) * 2), + inv_xr_hat_padded(reinterpret_cast(xr_hat.data()), n0, + (n1 / 2 + 1) * 2); + + // Initialize xr_hat through xr_padded + auto sub_xr_padded = + Kokkos::subview(xr_padded, Kokkos::ALL, Kokkos::pair(0, n1)); + + const Kokkos::complex z(1.0, 1.0); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(xr_ref, random_pool, 1.0); + Kokkos::fill_random(x, random_pool, z); + Kokkos::deep_copy(sub_xr_padded, xr_ref); + Kokkos::deep_copy(x_ref, x); + + using axes_type = KokkosFFT::axis_type<2>; + axes_type axes = {-2, -1}; + + if constexpr (std::is_same_v) { + // in-place transforms are not supported if transpose is needed + EXPECT_THROW(KokkosFFT::fft2(execution_space(), x, x_hat, + KokkosFFT::Normalization::backward, axes), + std::runtime_error); + EXPECT_THROW(KokkosFFT::ifft2(execution_space(), x_hat, inv_x_hat, + KokkosFFT::Normalization::backward, axes), + std::runtime_error); + EXPECT_THROW(KokkosFFT::rfft2(execution_space(), xr, xr_hat, + KokkosFFT::Normalization::backward, axes), + std::runtime_error); + EXPECT_THROW(KokkosFFT::irfft2(execution_space(), xr_hat, inv_xr_hat, + KokkosFFT::Normalization::backward, axes), + std::runtime_error); + } else { + // Identity tests for complex transforms + KokkosFFT::fft2(execution_space(), x, x_hat, + KokkosFFT::Normalization::backward, axes); + KokkosFFT::ifft2(execution_space(), x_hat, inv_x_hat, + KokkosFFT::Normalization::backward, axes); + + Kokkos::fence(); + EXPECT_TRUE(allclose(inv_x_hat, x_ref, 1.e-5, atol)); + + // In-place transforms + KokkosFFT::rfft2(execution_space(), xr, xr_hat, + KokkosFFT::Normalization::backward, axes); + + // Out-of-place transforms (reference) + KokkosFFT::rfft2(execution_space(), xr_ref, xr_hat_ref, + KokkosFFT::Normalization::backward, axes); + + Kokkos::fence(); + EXPECT_TRUE(allclose(xr_hat, xr_hat_ref, 1.e-5, atol)); + + // In-place transforms + Kokkos::fill_random(xr_hat, random_pool, z); + Kokkos::deep_copy(xr_hat_ref, xr_hat); + KokkosFFT::irfft2(execution_space(), xr_hat, inv_xr_hat, + KokkosFFT::Normalization::backward, axes); + + // Out-of-place transforms (reference) + KokkosFFT::irfft2(execution_space(), xr_hat_ref, inv_xr_hat_ref, + KokkosFFT::Normalization::backward, axes); + + Kokkos::fence(); + auto sub_inv_xr_hat_padded = Kokkos::subview(inv_xr_hat_padded, Kokkos::ALL, + Kokkos::pair(0, n1)); + Kokkos::deep_copy(inv_xr_hat_unpadded, sub_inv_xr_hat_padded); + + EXPECT_TRUE(allclose(inv_xr_hat_unpadded, inv_xr_hat_ref, 1.e-5, atol)); + } +} + template void test_fft2_2dfft_3dview(T atol = 1.e-12) { const int n0 = 10, n1 = 6, n2 = 8; @@ -2217,6 +2277,14 @@ TYPED_TEST(FFT2D, 2DFFT_2DView_shape) { test_fft2_2dfft_2dview_shape(); } +// fft2 on 2D Views with in-place transform +TYPED_TEST(FFT2D, 2DFFT_2DView_inplace) { + using float_type = typename TestFixture::float_type; + using layout_type = typename TestFixture::layout_type; + + test_fft2_2dfft_2dview_inplace(); +} + // batced fft2 on 3D Views TYPED_TEST(FFT2D, FFT_batched_3DView) { using float_type = typename TestFixture::float_type; diff --git a/fft/unit_test/Test_Utils.hpp b/fft/unit_test/Test_Utils.hpp index 055db441..54097bde 100644 --- a/fft/unit_test/Test_Utils.hpp +++ b/fft/unit_test/Test_Utils.hpp @@ -60,10 +60,104 @@ void display(ViewType& a) { auto* data = h_a.data(); std::cout << std::scientific << std::setprecision(16) << std::flush; - for (int i = 0; i < n; i++) { + for (std::size_t i = 0; i < n; i++) { std::cout << label + "[" << i << "]: " << i << ", " << data[i] << std::endl; } std::cout << std::resetiosflags(std::ios_base::floatfield); } +/// \brief Kokkos equivalent of fft1 with numpy +/// def fft1(x): +/// L = len(x) +/// phase = -2j * np.pi * (np.arange(L) / L) +/// phase = np.arange(L).reshape(-1, 1) * phase +/// return np.sum(x*np.exp(phase), axis=1) +/// +/// \tparam ViewType: Input and output view type +/// +/// \param in [in]: Input rank 1 view +/// \param out [out]: Output rank 1 view +template +void fft1(const ViewType& in, const ViewType& out) { + using value_type = typename ViewType::non_const_value_type; + using real_value_type = KokkosFFT::Impl::base_floating_point_type; + + static_assert(KokkosFFT::Impl::is_complex_v, + "fft1: ViewType must be complex"); + + constexpr auto pi = Kokkos::numbers::pi_v; + const value_type I(0.0, 1.0); + std::size_t L = in.size(); + + Kokkos::parallel_for( + "KokkosFFT::Test::fft1", + Kokkos::TeamPolicy(L, Kokkos::AUTO), + KOKKOS_LAMBDA( + const Kokkos::TeamPolicy::member_type& team_member) { + const int j = team_member.league_rank(); + + value_type sum = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, L), + [&](const int i, value_type& lsum) { + auto phase = -2 * I * pi * static_cast(i) / + static_cast(L); + + auto tmp_in = in(i); + lsum += + tmp_in * Kokkos::exp(static_cast(j) * phase); + }, + sum); + + out(j) = sum; + }); +} + +/// \brief Kokkos equivalent of ifft1 with numpy +/// def ifft1(x): +/// L = len(x) +/// phase = 2j * np.pi * (np.arange(L) / L) +/// phase = np.arange(L).reshape(-1, 1) * phase +/// return np.sum(x*np.exp(phase), axis=1) +/// +/// \tparam ViewType: Input and output view type +/// +/// \param in [in]: Input rank 1 view +/// \param out [out]: Output rank 1 view +template +void ifft1(const ViewType& in, const ViewType& out) { + using value_type = typename ViewType::non_const_value_type; + using real_value_type = KokkosFFT::Impl::base_floating_point_type; + + static_assert(KokkosFFT::Impl::is_complex_v, + "ifft1: ViewType must be complex"); + + constexpr auto pi = Kokkos::numbers::pi_v; + const value_type I(0.0, 1.0); + std::size_t L = in.size(); + + Kokkos::parallel_for( + "KokkosFFT::Test::ifft1", + Kokkos::TeamPolicy(L, Kokkos::AUTO), + KOKKOS_LAMBDA( + const Kokkos::TeamPolicy::member_type& team_member) { + const int j = team_member.league_rank(); + + value_type sum = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, L), + [&](const int i, value_type& lsum) { + auto phase = 2 * I * pi * static_cast(i) / + static_cast(L); + + auto tmp_in = in(i); + lsum += + tmp_in * Kokkos::exp(static_cast(j) * phase); + }, + sum); + + out(j) = sum; + }); +} + #endif