From 80efe337962d5e883f353545b9e61867a89cf595 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 00:16:01 +0900 Subject: [PATCH 01/13] Add benchmark as a submodule --- .gitmodules | 3 +++ tpls/benchmark | 1 + 2 files changed, 4 insertions(+) create mode 160000 tpls/benchmark diff --git a/.gitmodules b/.gitmodules index df51ef69..1bff3dde 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "tpls/googletest"] path = tpls/googletest url = https://github.com/google/googletest.git +[submodule "tpls/benchmark"] + path = tpls/benchmark + url = https://github.com/google/benchmark.git diff --git a/tpls/benchmark b/tpls/benchmark new file mode 160000 index 00000000..e9905638 --- /dev/null +++ b/tpls/benchmark @@ -0,0 +1 @@ +Subproject commit e990563876ef92990e873dc5b479d3b79cda2547 From 9f4f8e3a0316a5d8fad4191167a628f5938bed91 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 19:41:20 +0900 Subject: [PATCH 02/13] Ad version files for benchmark --- CMakeLists.txt | 42 ++++++++++++++++++++++++++++- cmake/KokkosFFT_Version_Info.hpp.in | 36 +++++++++++++++++++++++++ cmake/KokkosFFT_config.h.in | 30 +++++++++++++++++++++ common/src/CMakeLists.txt | 11 +++++--- 4 files changed, 115 insertions(+), 4 deletions(-) create mode 100644 cmake/KokkosFFT_Version_Info.hpp.in create mode 100644 cmake/KokkosFFT_config.h.in diff --git a/CMakeLists.txt b/CMakeLists.txt index f5b02b65..ffcd64f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,19 @@ list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/cmake") option(BUILD_EXAMPLES "Build kokkos-fft examples" ON) option(KokkosFFT_ENABLE_HOST_AND_DEVICE "Enable fft on both host and device" OFF) option(KokkosFFT_INTERNAL_Kokkos "Build internal Kokkos instead of relying on external one" OFF) +option(KokkosFFT_ENABLE_BENCHMARK "Build benchmarks for KokkosFFT" OFF) + +# Version information +set(KokkosFFT_VERSION_MAJOR 0) +set(KokkosFFT_VERSION_MINOR 0) +set(KokkosFFT_VERSION_PATCH 00) +set(KokkosFFT_VERSION "${KokkosFFT_VERSION_MAJOR}.${KokkosFFT_VERSION_MINOR}.${KokkosFFT_VERSION_PATCH}") + +#Set variables for config file +math(EXPR KOKKOSFFT_VERSION "${KokkosFFT_VERSION_MAJOR} * 10000 + ${KokkosFFT_VERSION_MINOR} * 100 + ${KokkosFFT_VERSION_PATCH}") +math(EXPR KOKKOSFFT_VERSION_MAJOR "${KOKKOSFFT_VERSION} / 10000") +math(EXPR KOKKOSFFT_VERSION_MINOR "${KOKKOSFFT_VERSION} / 100 % 100") +math(EXPR KOKKOSFFT_VERSION_PATCH "${KOKKOSFFT_VERSION} % 100") if (NOT KokkosFFT_INTERNAL_Kokkos) # First check, Kokkos is added as subdirectory or not @@ -27,6 +40,33 @@ if(BUILD_TESTING) endif() endif() +# Benchmark +if(KokkosFFT_ENABLE_BENCHMARK) + option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." OFF) + add_subdirectory(tpls/benchmark) + + # [TO DO] Fix this, it detects benchmark not a googlebench + #find_package(benchmark QUIET) + #if(NOT benchmark_FOUND) + # add_subdirectory(tpls/benchmark) + #endif() +endif() + +# Configure files to display configuration +# Configure the library +set( + PACKAGE_NAME_CONFIG_FILES + KokkosFFT_config.h + KokkosFFT_Version_Info.hpp +) + +foreach(CONFIG_FILE ${PACKAGE_NAME_CONFIG_FILES}) + configure_file( + cmake/${CONFIG_FILE}.in + ${CMAKE_BINARY_DIR}/${CONFIG_FILE} + ) +endforeach() + # Set directories used for install include(GNUInstallDirs) set(LIBDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}) @@ -71,7 +111,7 @@ configure_package_config_file(cmake/KokkosFFTConfig.cmake.in write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/KokkosFFTConfigVersion.cmake - VERSION 0.0.0 + VERSION ${KokkosFFT_VERSION} COMPATIBILITY SameMajorVersion ) diff --git a/cmake/KokkosFFT_Version_Info.hpp.in b/cmake/KokkosFFT_Version_Info.hpp.in new file mode 100644 index 00000000..0d0f9812 --- /dev/null +++ b/cmake/KokkosFFT_Version_Info.hpp.in @@ -0,0 +1,36 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSFFT_VERSION_INFO_HPP +#define KOKKOSFFT_VERSION_INFO_HPP + +#include + +namespace KokkosFFT { +namespace Impl { + +constexpr std::string_view GIT_BRANCH = R"branch(@GIT_BRANCH@)branch"; +constexpr std::string_view GIT_COMMIT_HASH = "@GIT_COMMIT_HASH@"; +constexpr std::string_view GIT_CLEAN_STATUS = "@GIT_CLEAN_STATUS@"; +constexpr std::string_view GIT_COMMIT_DESCRIPTION = + R"message(@GIT_COMMIT_DESCRIPTION@)message"; +constexpr std::string_view GIT_COMMIT_DATE = "@GIT_COMMIT_DATE@"; +constexpr std::string_view BENCHMARK_VERSION = "@BENCHMARK_VERSION@"; + +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/cmake/KokkosFFT_config.h.in b/cmake/KokkosFFT_config.h.in new file mode 100644 index 00000000..cf8321a3 --- /dev/null +++ b/cmake/KokkosFFT_config.h.in @@ -0,0 +1,30 @@ +#ifndef KOKKOSFFT_CONFIG_H +#define KOKKOSFFT_CONFIG_H + +/* Define the current version of Kokkos Kernels */ +#define KOKKOSFFT_VERSION @KOKKOSFFT_VERSION@ +#define KOKKOSFFT_VERSION_MAJOR @KOKKOSFFT_VERSION_MAJOR@ +#define KOKKOSFFT_VERSION_MINOR @KOKKOSFFT_VERSION_MINOR@ +#define KOKKOSFFT_VERSION_PATCH @KOKKOSFFT_VERSION_PATCH@ + +/* Define if building in debug mode */ +#cmakedefine HAVE_KOKKOSFFT_DEBUG + +#cmakedefine KOKKOSFFT_ENABLE_BENCHMARK + +/* Define this macro if experimental features of KokkosFFT are enabled */ +#cmakedefine HAVE_KOKKOSFFT_EXPERIMENTAL + +/* FFTW */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_FFTW + +/* CUFFT */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_CUFFT + +/* HIPFFT */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_HIPFFT + +/* ONEMKL */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_ONEMKL + +#endif \ No newline at end of file diff --git a/common/src/CMakeLists.txt b/common/src/CMakeLists.txt index fa7d5acd..0af53a3c 100644 --- a/common/src/CMakeLists.txt +++ b/common/src/CMakeLists.txt @@ -8,33 +8,38 @@ target_link_libraries(common if(Kokkos_ENABLE_CUDA) find_package(CUDAToolkit REQUIRED COMPONENTS cufft) target_link_libraries(common INTERFACE CUDA::cufft) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_CUFFT) if(KokkosFFT_ENABLE_HOST_AND_DEVICE) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) - target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE KOKKOSFFT_ENABLE_TPL_FFTW) endif() elseif(Kokkos_ENABLE_HIP) find_package(hipfft REQUIRED) target_link_libraries(common INTERFACE hip::hipfft) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_HIPFFT) if(KokkosFFT_ENABLE_HOST_AND_DEVICE) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) - target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE KOKKOSFFT_ENABLE_TPL_FFTW) endif() elseif(Kokkos_ENABLE_SYCL) find_package(MKL REQUIRED COMPONENTS SYCL) target_link_libraries(common INTERFACE MKL::MKL_SYCL) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_ONEMKL) if(KokkosFFT_ENABLE_HOST_AND_DEVICE) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) - target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE KOKKOSFFT_ENABLE_TPL_FFTW) endif() elseif(Kokkos_ENABLE_OPENMP) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_FFTW) elseif(Kokkos_ENABLE_SERIAL) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_FFTW) endif() target_compile_features(common INTERFACE cxx_std_17) From 131eca13219f8cb79d52decebf92ada8dc708014 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 19:42:12 +0900 Subject: [PATCH 03/13] Add a trivial benchmark --- fft/CMakeLists.txt | 4 + fft/perf_test/BenchmarkMain.cpp | 37 ++++++ fft/perf_test/Benchmark_Context.hpp | 121 +++++++++++++++++ fft/perf_test/CMakeLists.txt | 71 ++++++++++ .../KokkosFFT_PrintConfiguration.hpp | 85 ++++++++++++ fft/perf_test/KokkosFFT_TplsVersion.hpp | 40 ++++++ fft/perf_test/PerfTest_FFT1.cpp | 123 ++++++++++++++++++ fft/perf_test/PerfTest_FFT1.hpp | 103 +++++++++++++++ 8 files changed, 584 insertions(+) create mode 100644 fft/perf_test/BenchmarkMain.cpp create mode 100644 fft/perf_test/Benchmark_Context.hpp create mode 100644 fft/perf_test/CMakeLists.txt create mode 100644 fft/perf_test/KokkosFFT_PrintConfiguration.hpp create mode 100644 fft/perf_test/KokkosFFT_TplsVersion.hpp create mode 100644 fft/perf_test/PerfTest_FFT1.cpp create mode 100644 fft/perf_test/PerfTest_FFT1.hpp diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt index ffddf45b..e2b19192 100644 --- a/fft/CMakeLists.txt +++ b/fft/CMakeLists.txt @@ -1,4 +1,8 @@ add_subdirectory(src) if(BUILD_TESTING) add_subdirectory(unit_test) +endif() + +if(KokkosFFT_ENABLE_BENCHMARK) + add_subdirectory(perf_test) endif() \ No newline at end of file diff --git a/fft/perf_test/BenchmarkMain.cpp b/fft/perf_test/BenchmarkMain.cpp new file mode 100644 index 00000000..2b6c856b --- /dev/null +++ b/fft/perf_test/BenchmarkMain.cpp @@ -0,0 +1,37 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +*/ + +#include + +#include "Benchmark_Context.hpp" +#include + +int main(int argc, char** argv) { + Kokkos::initialize(argc, argv); + { + benchmark::Initialize(&argc, argv); + benchmark::SetDefaultTimeUnit(benchmark::kSecond); + KokkosFFTBenchmark::add_benchmark_context(true); + + benchmark::RunSpecifiedBenchmarks(); + + benchmark::Shutdown(); + } + Kokkos::finalize(); + return 0; +} \ No newline at end of file diff --git a/fft/perf_test/Benchmark_Context.hpp b/fft/perf_test/Benchmark_Context.hpp new file mode 100644 index 00000000..1d222bb4 --- /dev/null +++ b/fft/perf_test/Benchmark_Context.hpp @@ -0,0 +1,121 @@ +#ifndef KOKKOSFFT_BENCHMARK_CONTEXT_HPP +#define KOKKOSFFT_BENCHMARK_CONTEXT_HPP + +#include +#include + +#include + +#include +#include "KokkosFFT_PrintConfiguration.hpp" +#include + +namespace KokkosFFTBenchmark { + /// \brief Remove unwanted spaces and colon signs from input string. In case of + /// invalid input it will return an empty string. + inline std::string remove_unwanted_characters(std::string str) { + auto from = str.find_first_not_of(" :"); + auto to = str.find_last_not_of(" :"); + + if (from == std::string::npos || to == std::string::npos) { + return ""; + } + + // return extracted part of string without unwanted spaces and colon signs + return str.substr(from, to + 1); + } + + /// \brief Extract all key:value pairs from kokkos configuration and add it to + /// the benchmark context + inline void add_kokkos_configuration(bool verbose) { + std::ostringstream msg; + Kokkos::print_configuration(msg, verbose); + KokkosFFT::print_configuration(msg); + + // Iterate over lines returned from kokkos and extract key:value pairs + std::stringstream ss{msg.str()}; + for (std::string line; std::getline(ss, line, '\n');) { + auto found = line.find_first_of(':'); + if (found != std::string::npos) { + auto val = remove_unwanted_characters(line.substr(found + 1)); + // Ignore line without value, for example a category name + if (!val.empty()) { + benchmark::AddCustomContext( + remove_unwanted_characters(line.substr(0, found)), val); + } + } + } + } + + /// \brief Add Kokkos Kernels git info and google benchmark release to + /// benchmark context. + inline void add_version_info() { + using namespace KokkosFFT::Impl; + + if (!GIT_BRANCH.empty()) { + benchmark::AddCustomContext("GIT_BRANCH", std::string(GIT_BRANCH)); + benchmark::AddCustomContext("GIT_COMMIT_HASH", + std::string(GIT_COMMIT_HASH)); + benchmark::AddCustomContext("GIT_CLEAN_STATUS", + std::string(GIT_CLEAN_STATUS)); + benchmark::AddCustomContext("GIT_COMMIT_DESCRIPTION", + std::string(GIT_COMMIT_DESCRIPTION)); + benchmark::AddCustomContext("GIT_COMMIT_DATE", + std::string(GIT_COMMIT_DATE)); + } + if (!BENCHMARK_VERSION.empty()) { + benchmark::AddCustomContext("GOOGLE_BENCHMARK_VERSION", + std::string(BENCHMARK_VERSION)); + } + } + + inline void add_env_info() { + auto num_threads = std::getenv("OMP_NUM_THREADS"); + if (num_threads) { + benchmark::AddCustomContext("OMP_NUM_THREADS", num_threads); + } + auto dynamic = std::getenv("OMP_DYNAMIC"); + if (dynamic) { + benchmark::AddCustomContext("OMP_DYNAMIC", dynamic); + } + auto proc_bind = std::getenv("OMP_PROC_BIND"); + if (proc_bind) { + benchmark::AddCustomContext("OMP_PROC_BIND", proc_bind); + } + auto places = std::getenv("OMP_PLACES"); + if (places) { + benchmark::AddCustomContext("OMP_PLACES", places); + } + } + + /// \brief Gather all context information and add it to benchmark context + inline void add_benchmark_context(bool verbose = false) { + add_kokkos_configuration(verbose); + add_version_info(); + add_env_info(); + } + + /** + * \brief Report throughput and amount of data processed for simple View + * operations + */ + template + void report_results(benchmark::State& state, InViewType in, OutViewType out, double time) { + // data processed in megabytes + const double in_data_processed = static_cast(in.size() * + sizeof(typename InViewType::value_type)) / + 1.0e6; + const double out_data_processed = static_cast(out.size() * + sizeof(typename OutViewType::value_type)) / + 1.0e6; + + state.SetIterationTime(time); + state.counters["MB (In)"] = benchmark::Counter(in_data_processed); + state.counters["MB (Out)"] = benchmark::Counter(out_data_processed); + state.counters["GB/s"] = benchmark::Counter( + (in_data_processed + out_data_processed) / 1.0e3, benchmark::Counter::kIsIterationInvariantRate); + } + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file diff --git a/fft/perf_test/CMakeLists.txt b/fft/perf_test/CMakeLists.txt new file mode 100644 index 00000000..f2f37c81 --- /dev/null +++ b/fft/perf_test/CMakeLists.txt @@ -0,0 +1,71 @@ +function(KOKKOSFFT_ADD_BENCHMARK name) + CMAKE_PARSE_ARGUMENTS( + BENCHMARK + "" + "" + "SOURCES" + ${ARGN} + ) + + if(DEFINED BENCHMARK_UNPARSED_ARGUMENTS) + message( + WARNING + "Unexpected arguments when adding a benchmark: " + ${BENCHMARK_UNPARSED_ARGUMENTS} + ) + endif() + + set(BENCHMARK_NAME ${PACKAGE_NAME}_${name}) + + #Adding BenchmarkMain.cpp to sources + list(APPEND BENCHMARK_SOURCES + BenchmarkMain.cpp + ) + + add_executable( + ${BENCHMARK_NAME} + ${BENCHMARK_SOURCES} + ) + target_link_libraries( + ${BENCHMARK_NAME} + PRIVATE benchmark::benchmark Kokkos::kokkos KokkosFFT::fft + ) + target_include_directories( + ${BENCHMARK_NAME} + SYSTEM PRIVATE ${benchmark_SOURCE_DIR}/include + ) + target_include_directories( + ${BENCHMARK_NAME} + PRIVATE ${CMAKE_BINARY_DIR} + ) + + foreach(SOURCE_FILE ${BENCHMARK_SOURCES}) + SET_SOURCE_FILES_PROPERTIES( + ${SOURCE_FILE} + PROPERTIES LANGUAGE CXX + ) + endforeach() + + string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC) + set( + BENCHMARK_ARGS + --benchmark_counters_tabular=true + --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json + ) + + add_test( + NAME ${BENCHMARK_NAME} + COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS} + ) +endfunction() + +# Set benchmark targets +set( + BENCHMARK_SOURCES + PerfTest_FFT1.cpp +) + +KOKKOSFFT_ADD_BENCHMARK( + PerformanceTest_Benchmark + SOURCES ${BENCHMARK_SOURCES} +) \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_PrintConfiguration.hpp b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp new file mode 100644 index 00000000..bf590f00 --- /dev/null +++ b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp @@ -0,0 +1,85 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSFFT_PRINT_CONFIGURATION_HPP +#define KOKKOSFFT_PRINT_CONFIGURATION_HPP + +#include "KokkosFFT_config.h" +#include "KokkosFFT_TplsVersion.hpp" +#include + +namespace KokkosFFT { +namespace Impl { + +inline void print_cufft_version_if_enabled(std::ostream& os) { +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) + os << " " + << "KOKKOSFFT_ENABLE_TPL_CUFFT: " << cufft_version_string() << "\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_CUFFT: no\n"; +#endif +} + +inline void print_enabled_tpls(std::ostream& os) { +#ifdef KOKKOSFFT_ENABLE_TPL_FFTW + os << " " + << "KOKKOSFFT_ENABLE_TPL_FFTW: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_FFTW: no\n"; +#endif + + print_cufft_version_if_enabled(os); + +#ifdef KOKKOSFFT_ENABLE_TPL_HIPFFT + os << " " + << "KOKKOSFFT_ENABLE_TPL_HIPFFT: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_HIPFFT: no\n"; +#endif + +#ifdef KOKKOSFFT_ENABLE_TPL_ONEMKL + os << " " + << "KOKKOSFFT_ENABLE_TPL_ONEMKL: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_ONEMKL: no\n"; +#endif +} + + +inline void print_version(std::ostream& os) { + + // KOKKOSFFT_VERSION is used because MAJOR, MINOR and PATCH macros + // are not available in FFT + os << " " + << "KokkosFFT Version: " << KOKKOSFFT_VERSION_MAJOR << "." + << KOKKOSFFT_VERSION_MINOR << "." << KOKKOSFFT_VERSION_PATCH + << '\n'; +} +} // namespace Impl + +inline void print_configuration(std::ostream& os) { + Impl::print_version(os); + + os << "TPLs: \n"; + Impl::print_enabled_tpls(os); +} + +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_TplsVersion.hpp b/fft/perf_test/KokkosFFT_TplsVersion.hpp new file mode 100644 index 00000000..11e0354a --- /dev/null +++ b/fft/perf_test/KokkosFFT_TplsVersion.hpp @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSFFT_TPLS_VERSIONS_HPP +#define KOKKOSFFT_TPLS_VERSIONS_HPP + +#include "KokkosFFT_config.h" +#include +#include + +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) +#include "cufft.h" +#endif + +namespace KokkosFFT { +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) +inline std::string cufft_version_string() { + // Print version + std::stringstream ss; + + ss << CUFFT_VER_MAJOR << "." << CUFFT_VER_MINOR << "." << CUFFT_VER_PATCH; + + return ss.str(); +} +#endif + +} // namespace KokkosFFT +#endif \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.cpp b/fft/perf_test/PerfTest_FFT1.cpp new file mode 100644 index 00000000..846714cd --- /dev/null +++ b/fft/perf_test/PerfTest_FFT1.cpp @@ -0,0 +1,123 @@ +#include +#include "Benchmark_Context.hpp" +#include "PerfTest_FFT1.hpp" + +namespace KokkosFFTBenchmark { + +// 1D FFT on 1D View +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D IFFT on 1D View +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D RFFT on 1D View +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D IRFFT on 1D View +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +} // namespace KokkosFFTBenchmark \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.hpp b/fft/perf_test/PerfTest_FFT1.hpp new file mode 100644 index 00000000..074a78e0 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT1.hpp @@ -0,0 +1,103 @@ +#ifndef KOKKOSFFT_PERFTEST_FFT1_HPP +#define KOKKOSFFT_PERFTEST_FFT1_HPP + +#include +#include +#include +#include "Benchmark_Context.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; + +namespace KokkosFFTBenchmark { + +template +void fft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::fft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void ifft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::ifft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void rfft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::rfft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void irfft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::irfft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +static void FFT_1DView(benchmark::State& state) { + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n), x_hat("x_hat", n); + + fft(x, x_hat, state); +} + +template +static void IFFT_1DView(benchmark::State& state) { + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n), x_hat("x_hat", n); + + ifft(x, x_hat, state); +} + +template +static void RFFT_1DView(benchmark::State& state) { + using RealView1DType = Kokkos::View; + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + RealView1DType x("x", n); + ComplexView1DType x_hat("x_hat", n/2+1); + + rfft(x, x_hat, state); +} + +template +static void IRFFT_1DView(benchmark::State& state) { + using RealView1DType = Kokkos::View; + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n/2+1); + RealView1DType x_hat("x_hat", n); + + irfft(x, x_hat, state); +} + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file From 928fd3ddb3e20d748be0c1d7d0f6f8bb85d7ab6f Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 19:52:36 +0900 Subject: [PATCH 04/13] formatting --- fft/perf_test/Benchmark_Context.hpp | 195 +++++++++--------- .../KokkosFFT_PrintConfiguration.hpp | 9 +- fft/perf_test/KokkosFFT_TplsVersion.hpp | 2 +- fft/perf_test/PerfTest_FFT1.cpp | 2 +- fft/perf_test/PerfTest_FFT1.hpp | 6 +- 5 files changed, 107 insertions(+), 107 deletions(-) diff --git a/fft/perf_test/Benchmark_Context.hpp b/fft/perf_test/Benchmark_Context.hpp index 1d222bb4..337b3f90 100644 --- a/fft/perf_test/Benchmark_Context.hpp +++ b/fft/perf_test/Benchmark_Context.hpp @@ -11,111 +11,114 @@ #include namespace KokkosFFTBenchmark { - /// \brief Remove unwanted spaces and colon signs from input string. In case of - /// invalid input it will return an empty string. - inline std::string remove_unwanted_characters(std::string str) { - auto from = str.find_first_not_of(" :"); - auto to = str.find_last_not_of(" :"); - - if (from == std::string::npos || to == std::string::npos) { - return ""; - } - - // return extracted part of string without unwanted spaces and colon signs - return str.substr(from, to + 1); +/// \brief Remove unwanted spaces and colon signs from input string. In case of +/// invalid input it will return an empty string. +inline std::string remove_unwanted_characters(std::string str) { + auto from = str.find_first_not_of(" :"); + auto to = str.find_last_not_of(" :"); + + if (from == std::string::npos || to == std::string::npos) { + return ""; } - /// \brief Extract all key:value pairs from kokkos configuration and add it to - /// the benchmark context - inline void add_kokkos_configuration(bool verbose) { - std::ostringstream msg; - Kokkos::print_configuration(msg, verbose); - KokkosFFT::print_configuration(msg); - - // Iterate over lines returned from kokkos and extract key:value pairs - std::stringstream ss{msg.str()}; - for (std::string line; std::getline(ss, line, '\n');) { - auto found = line.find_first_of(':'); - if (found != std::string::npos) { - auto val = remove_unwanted_characters(line.substr(found + 1)); - // Ignore line without value, for example a category name - if (!val.empty()) { - benchmark::AddCustomContext( - remove_unwanted_characters(line.substr(0, found)), val); - } + // return extracted part of string without unwanted spaces and colon signs + return str.substr(from, to + 1); +} + +/// \brief Extract all key:value pairs from kokkos configuration and add it to +/// the benchmark context +inline void add_kokkos_configuration(bool verbose) { + std::ostringstream msg; + Kokkos::print_configuration(msg, verbose); + KokkosFFT::print_configuration(msg); + + // Iterate over lines returned from kokkos and extract key:value pairs + std::stringstream ss{msg.str()}; + for (std::string line; std::getline(ss, line, '\n');) { + auto found = line.find_first_of(':'); + if (found != std::string::npos) { + auto val = remove_unwanted_characters(line.substr(found + 1)); + // Ignore line without value, for example a category name + if (!val.empty()) { + benchmark::AddCustomContext( + remove_unwanted_characters(line.substr(0, found)), val); } } } - - /// \brief Add Kokkos Kernels git info and google benchmark release to - /// benchmark context. - inline void add_version_info() { - using namespace KokkosFFT::Impl; - - if (!GIT_BRANCH.empty()) { - benchmark::AddCustomContext("GIT_BRANCH", std::string(GIT_BRANCH)); - benchmark::AddCustomContext("GIT_COMMIT_HASH", - std::string(GIT_COMMIT_HASH)); - benchmark::AddCustomContext("GIT_CLEAN_STATUS", - std::string(GIT_CLEAN_STATUS)); - benchmark::AddCustomContext("GIT_COMMIT_DESCRIPTION", - std::string(GIT_COMMIT_DESCRIPTION)); - benchmark::AddCustomContext("GIT_COMMIT_DATE", - std::string(GIT_COMMIT_DATE)); - } - if (!BENCHMARK_VERSION.empty()) { - benchmark::AddCustomContext("GOOGLE_BENCHMARK_VERSION", - std::string(BENCHMARK_VERSION)); - } +} + +/// \brief Add Kokkos Kernels git info and google benchmark release to +/// benchmark context. +inline void add_version_info() { + using namespace KokkosFFT::Impl; + + if (!GIT_BRANCH.empty()) { + benchmark::AddCustomContext("GIT_BRANCH", std::string(GIT_BRANCH)); + benchmark::AddCustomContext("GIT_COMMIT_HASH", + std::string(GIT_COMMIT_HASH)); + benchmark::AddCustomContext("GIT_CLEAN_STATUS", + std::string(GIT_CLEAN_STATUS)); + benchmark::AddCustomContext("GIT_COMMIT_DESCRIPTION", + std::string(GIT_COMMIT_DESCRIPTION)); + benchmark::AddCustomContext("GIT_COMMIT_DATE", + std::string(GIT_COMMIT_DATE)); } - - inline void add_env_info() { - auto num_threads = std::getenv("OMP_NUM_THREADS"); - if (num_threads) { - benchmark::AddCustomContext("OMP_NUM_THREADS", num_threads); - } - auto dynamic = std::getenv("OMP_DYNAMIC"); - if (dynamic) { - benchmark::AddCustomContext("OMP_DYNAMIC", dynamic); - } - auto proc_bind = std::getenv("OMP_PROC_BIND"); - if (proc_bind) { - benchmark::AddCustomContext("OMP_PROC_BIND", proc_bind); - } - auto places = std::getenv("OMP_PLACES"); - if (places) { - benchmark::AddCustomContext("OMP_PLACES", places); - } + if (!BENCHMARK_VERSION.empty()) { + benchmark::AddCustomContext("GOOGLE_BENCHMARK_VERSION", + std::string(BENCHMARK_VERSION)); } +} - /// \brief Gather all context information and add it to benchmark context - inline void add_benchmark_context(bool verbose = false) { - add_kokkos_configuration(verbose); - add_version_info(); - add_env_info(); +inline void add_env_info() { + auto num_threads = std::getenv("OMP_NUM_THREADS"); + if (num_threads) { + benchmark::AddCustomContext("OMP_NUM_THREADS", num_threads); } - - /** - * \brief Report throughput and amount of data processed for simple View - * operations - */ - template - void report_results(benchmark::State& state, InViewType in, OutViewType out, double time) { - // data processed in megabytes - const double in_data_processed = static_cast(in.size() * - sizeof(typename InViewType::value_type)) / - 1.0e6; - const double out_data_processed = static_cast(out.size() * - sizeof(typename OutViewType::value_type)) / - 1.0e6; - - state.SetIterationTime(time); - state.counters["MB (In)"] = benchmark::Counter(in_data_processed); - state.counters["MB (Out)"] = benchmark::Counter(out_data_processed); - state.counters["GB/s"] = benchmark::Counter( - (in_data_processed + out_data_processed) / 1.0e3, benchmark::Counter::kIsIterationInvariantRate); + auto dynamic = std::getenv("OMP_DYNAMIC"); + if (dynamic) { + benchmark::AddCustomContext("OMP_DYNAMIC", dynamic); } - -} // namespace KokkosFFTBenchmark + auto proc_bind = std::getenv("OMP_PROC_BIND"); + if (proc_bind) { + benchmark::AddCustomContext("OMP_PROC_BIND", proc_bind); + } + auto places = std::getenv("OMP_PLACES"); + if (places) { + benchmark::AddCustomContext("OMP_PLACES", places); + } +} + +/// \brief Gather all context information and add it to benchmark context +inline void add_benchmark_context(bool verbose = false) { + add_kokkos_configuration(verbose); + add_version_info(); + add_env_info(); +} + +/** + * \brief Report throughput and amount of data processed for simple View + * operations + */ +template +void report_results(benchmark::State& state, InViewType in, OutViewType out, + double time) { + // data processed in megabytes + const double in_data_processed = + static_cast(in.size() * sizeof(typename InViewType::value_type)) / + 1.0e6; + const double out_data_processed = + static_cast(out.size() * + sizeof(typename OutViewType::value_type)) / + 1.0e6; + + state.SetIterationTime(time); + state.counters["MB (In)"] = benchmark::Counter(in_data_processed); + state.counters["MB (Out)"] = benchmark::Counter(out_data_processed); + state.counters["GB/s"] = + benchmark::Counter((in_data_processed + out_data_processed) / 1.0e3, + benchmark::Counter::kIsIterationInvariantRate); +} + +} // namespace KokkosFFTBenchmark #endif \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_PrintConfiguration.hpp b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp index bf590f00..022b3efc 100644 --- a/fft/perf_test/KokkosFFT_PrintConfiguration.hpp +++ b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp @@ -61,17 +61,14 @@ inline void print_enabled_tpls(std::ostream& os) { #endif } - inline void print_version(std::ostream& os) { - // KOKKOSFFT_VERSION is used because MAJOR, MINOR and PATCH macros // are not available in FFT os << " " << "KokkosFFT Version: " << KOKKOSFFT_VERSION_MAJOR << "." - << KOKKOSFFT_VERSION_MINOR << "." << KOKKOSFFT_VERSION_PATCH - << '\n'; + << KOKKOSFFT_VERSION_MINOR << "." << KOKKOSFFT_VERSION_PATCH << '\n'; } -} // namespace Impl +} // namespace Impl inline void print_configuration(std::ostream& os) { Impl::print_version(os); @@ -80,6 +77,6 @@ inline void print_configuration(std::ostream& os) { Impl::print_enabled_tpls(os); } -} // namespace KokkosFFT +} // namespace KokkosFFT #endif \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_TplsVersion.hpp b/fft/perf_test/KokkosFFT_TplsVersion.hpp index 11e0354a..e5ec33da 100644 --- a/fft/perf_test/KokkosFFT_TplsVersion.hpp +++ b/fft/perf_test/KokkosFFT_TplsVersion.hpp @@ -36,5 +36,5 @@ inline std::string cufft_version_string() { } #endif -} // namespace KokkosFFT +} // namespace KokkosFFT #endif \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.cpp b/fft/perf_test/PerfTest_FFT1.cpp index 846714cd..4148103a 100644 --- a/fft/perf_test/PerfTest_FFT1.cpp +++ b/fft/perf_test/PerfTest_FFT1.cpp @@ -120,4 +120,4 @@ BENCHMARK(IRFFT_1DView) ->UseManualTime() ->Unit(benchmark::kMicrosecond); -} // namespace KokkosFFTBenchmark \ No newline at end of file +} // namespace KokkosFFTBenchmark \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.hpp b/fft/perf_test/PerfTest_FFT1.hpp index 074a78e0..6424b4e7 100644 --- a/fft/perf_test/PerfTest_FFT1.hpp +++ b/fft/perf_test/PerfTest_FFT1.hpp @@ -80,7 +80,7 @@ static void RFFT_1DView(benchmark::State& state) { const int n = state.range(0); RealView1DType x("x", n); - ComplexView1DType x_hat("x_hat", n/2+1); + ComplexView1DType x_hat("x_hat", n / 2 + 1); rfft(x, x_hat, state); } @@ -92,12 +92,12 @@ static void IRFFT_1DView(benchmark::State& state) { Kokkos::View*, LayoutType, execution_space>; const int n = state.range(0); - ComplexView1DType x("x", n/2+1); + ComplexView1DType x("x", n / 2 + 1); RealView1DType x_hat("x_hat", n); irfft(x, x_hat, state); } -} // namespace KokkosFFTBenchmark +} // namespace KokkosFFTBenchmark #endif \ No newline at end of file From e31d49b040c88a72dbaa9cd60dc0074290357c62 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 23:34:02 +0900 Subject: [PATCH 05/13] [Bugfix] swap_direction may be included multiple times --- common/src/KokkosFFT_normalization.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/KokkosFFT_normalization.hpp b/common/src/KokkosFFT_normalization.hpp index 47614a98..e9e5e4cf 100644 --- a/common/src/KokkosFFT_normalization.hpp +++ b/common/src/KokkosFFT_normalization.hpp @@ -61,7 +61,7 @@ void normalize(const ExecutionSpace& exec_space, ViewType& inout, if (to_normalize) _normalize(exec_space, inout, coef); } -auto swap_direction(Normalization normalization) { +inline auto swap_direction(Normalization normalization) { Normalization new_direction = Normalization::FORWARD; switch (normalization) { case Normalization::FORWARD: new_direction = Normalization::BACKWARD; break; From a752862f9e74984edded55f7c18e6d5f154c5b83 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 23:34:40 +0900 Subject: [PATCH 06/13] Benchmark for fft2 --- fft/perf_test/CMakeLists.txt | 1 + fft/perf_test/PerfTest_FFT2.cpp | 123 ++++++++++++++++++++++++++++++++ fft/perf_test/PerfTest_FFT2.hpp | 104 +++++++++++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 fft/perf_test/PerfTest_FFT2.cpp create mode 100644 fft/perf_test/PerfTest_FFT2.hpp diff --git a/fft/perf_test/CMakeLists.txt b/fft/perf_test/CMakeLists.txt index f2f37c81..bf4c57ad 100644 --- a/fft/perf_test/CMakeLists.txt +++ b/fft/perf_test/CMakeLists.txt @@ -63,6 +63,7 @@ endfunction() set( BENCHMARK_SOURCES PerfTest_FFT1.cpp + PerfTest_FFT2.cpp ) KOKKOSFFT_ADD_BENCHMARK( diff --git a/fft/perf_test/PerfTest_FFT2.cpp b/fft/perf_test/PerfTest_FFT2.cpp new file mode 100644 index 00000000..67a09109 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT2.cpp @@ -0,0 +1,123 @@ +#include +#include "Benchmark_Context.hpp" +#include "PerfTest_FFT2.hpp" + +namespace KokkosFFTBenchmark { + +// 2D FFT on 2D View +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 2D IFFT on 2D View +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 2D RFFT on 2D View +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 2D IRFFT on 2D View +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +} // namespace KokkosFFTBenchmark \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT2.hpp b/fft/perf_test/PerfTest_FFT2.hpp new file mode 100644 index 00000000..719ad571 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT2.hpp @@ -0,0 +1,104 @@ +#ifndef KOKKOSFFT_PERFTEST_FFT2_HPP +#define KOKKOSFFT_PERFTEST_FFT2_HPP + +#include +#include +#include +#include "Benchmark_Context.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; +using axis_type = KokkosFFT::axis_type<2>; + +namespace KokkosFFTBenchmark { + +template +void fft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::fft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void ifft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::ifft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void rfft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::rfft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void irfft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::irfft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +static void FFT2_2DView(benchmark::State& state) { + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView2DType x("x", n, n), x_hat("x_hat", n, n); + + fft2(x, x_hat, state); +} + +template +static void IFFT2_2DView(benchmark::State& state) { + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView2DType x("x", n, n), x_hat("x_hat", n, n); + + ifft2(x, x_hat, state); +} + +template +static void RFFT2_2DView(benchmark::State& state) { + using RealView2DType = Kokkos::View; + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + RealView2DType x("x", n, n); + ComplexView2DType x_hat("x_hat", n, n / 2 + 1); + + rfft2(x, x_hat, state); +} + +template +static void IRFFT2_2DView(benchmark::State& state) { + using RealView2DType = Kokkos::View; + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView2DType x("x", n, n / 2 + 1); + RealView2DType x_hat("x_hat", n, n); + + irfft2(x, x_hat, state); +} + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file From 8d30799f03d2717fdf97bfe68800413e2091014d Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 23:37:22 +0900 Subject: [PATCH 07/13] Add buildtest for benchmark --- .github/workflows/cmake.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 928343d6..5518f114 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -54,7 +54,8 @@ jobs: run: | docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_OPENMP \ -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=g++ \ - -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_OPENMP=ON -DBUILD_TESTING=ON -DKokkosFFT_INTERNAL_Kokkos=ON + -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_OPENMP=ON -DBUILD_TESTING=ON -DKokkosFFT_INTERNAL_Kokkos=ON \ + -DKokkosFFT_ENABLE_BENCHMARK=ON - name: Configure CMake for CUDA backend # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. @@ -62,7 +63,8 @@ jobs: run: | docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_CUDA \ -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ - -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_${{env.CUDA_ARCHITECTURES}}=ON -DBUILD_TESTING=ON -DKokkosFFT_INTERNAL_Kokkos=ON + -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_${{env.CUDA_ARCHITECTURES}}=ON -DBUILD_TESTING=ON -DKokkosFFT_INTERNAL_Kokkos=ON \ + -DKokkosFFT_ENABLE_BENCHMARK=ON - name: Configure CMake for CUDA backend with HOST and DEVICE option # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. @@ -71,7 +73,7 @@ jobs: docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_CUDA_HOST_DEVICE \ -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_${{env.CUDA_ARCHITECTURES}}=ON -DBUILD_TESTING=ON \ - -DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON -DKokkosFFT_INTERNAL_Kokkos=ON + -DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON -DKokkosFFT_INTERNAL_Kokkos=ON -DKokkosFFT_ENABLE_BENCHMARK=ON - name: Build # Build your program with the given configuration @@ -135,7 +137,8 @@ jobs: run: | docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_${{matrix.backend.name}} \ -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ - -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkosFFT_INTERNAL_Kokkos=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON ${{matrix.backend.option}} + -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkosFFT_INTERNAL_Kokkos=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON \ + -DKokkosFFT_ENABLE_BENCHMARK=ON ${{matrix.backend.option}} - name: Build # Build your program with the given configuration @@ -189,6 +192,7 @@ jobs: -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ -DCMAKE_CXX_STANDARD=17 \ -DKokkosFFT_INTERNAL_Kokkos=ON \ + -DKokkosFFT_ENABLE_BENCHMARK=ON \ -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ -DKokkos_ARCH_NATIVE=ON \ -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ From 5f3c2d667ad355be1c20c09a62aad7e21bef2f47 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 23:39:55 +0900 Subject: [PATCH 08/13] formatting --- fft/perf_test/PerfTest_FFT2.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fft/perf_test/PerfTest_FFT2.hpp b/fft/perf_test/PerfTest_FFT2.hpp index 719ad571..a4c6ce64 100644 --- a/fft/perf_test/PerfTest_FFT2.hpp +++ b/fft/perf_test/PerfTest_FFT2.hpp @@ -7,7 +7,7 @@ #include "Benchmark_Context.hpp" using execution_space = Kokkos::DefaultExecutionSpace; -using axis_type = KokkosFFT::axis_type<2>; +using axis_type = KokkosFFT::axis_type<2>; namespace KokkosFFTBenchmark { From 8c190f9427dc893b62261cf81a9d22b7e4e55888 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 23:57:54 +0900 Subject: [PATCH 09/13] [Bugfix] Avoid multiple definitions of _exec --- fft/src/KokkosFFT_Cuda_transform.hpp | 24 ++++++++++++------------ fft/src/KokkosFFT_HIP_transform.hpp | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 4ba45964..23dc22f3 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -5,43 +5,43 @@ namespace KokkosFFT { namespace Impl { -void _exec(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, + [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecR2C failed"); } -void _exec(cufftHandle& plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftDoubleReal* idata, + cufftDoubleComplex* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecD2Z failed"); } -void _exec(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, + [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2R failed"); } -void _exec(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftDoubleComplex* idata, + cufftDoubleReal* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecZ2D failed"); } -void _exec(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, - int direction) { +inline void _exec(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, + int direction) { cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2C failed"); } -void _exec(cufftHandle& plan, cufftDoubleComplex* idata, - cufftDoubleComplex* odata, int direction) { +inline void _exec(cufftHandle& plan, cufftDoubleComplex* idata, + cufftDoubleComplex* odata, int direction) { cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecZ2Z failed"); diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index 30c1785a..2bd9ce86 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -5,43 +5,43 @@ namespace KokkosFFT { namespace Impl { -void _exec(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, - [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, + [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecR2C failed"); } -void _exec(hipfftHandle& plan, hipfftDoubleReal* idata, - hipfftDoubleComplex* odata, [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftDoubleReal* idata, + hipfftDoubleComplex* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecD2Z failed"); } -void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, - [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, + [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2R failed"); } -void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, - hipfftDoubleReal* odata, [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, + hipfftDoubleReal* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2D failed"); } -void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftComplex* odata, - int direction) { +inline void _exec(hipfftHandle& plan, hipfftComplex* idata, + hipfftComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2C failed"); } -void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, - hipfftDoubleComplex* odata, int direction) { +inline void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, + hipfftDoubleComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2Z failed"); From d67ac55cc7885af9bebfc5e8d01077c4f10e8513 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Fri, 2 Feb 2024 15:00:13 -0500 Subject: [PATCH 10/13] Simplify CMake for performance test --- fft/perf_test/CMakeLists.txt | 81 +++++++-------------------------- fft/perf_test/PerfTest_FFT1.cpp | 2 - fft/perf_test/PerfTest_FFT2.cpp | 2 - 3 files changed, 16 insertions(+), 69 deletions(-) diff --git a/fft/perf_test/CMakeLists.txt b/fft/perf_test/CMakeLists.txt index bf4c57ad..ddb531f0 100644 --- a/fft/perf_test/CMakeLists.txt +++ b/fft/perf_test/CMakeLists.txt @@ -1,72 +1,23 @@ -function(KOKKOSFFT_ADD_BENCHMARK name) - CMAKE_PARSE_ARGUMENTS( - BENCHMARK - "" - "" - "SOURCES" - ${ARGN} - ) +set(BENCHMARK_NAME PerformanceTest_Benchmark) - if(DEFINED BENCHMARK_UNPARSED_ARGUMENTS) - message( - WARNING - "Unexpected arguments when adding a benchmark: " - ${BENCHMARK_UNPARSED_ARGUMENTS} - ) - endif() - - set(BENCHMARK_NAME ${PACKAGE_NAME}_${name}) - - #Adding BenchmarkMain.cpp to sources - list(APPEND BENCHMARK_SOURCES - BenchmarkMain.cpp - ) - - add_executable( - ${BENCHMARK_NAME} - ${BENCHMARK_SOURCES} - ) - target_link_libraries( - ${BENCHMARK_NAME} - PRIVATE benchmark::benchmark Kokkos::kokkos KokkosFFT::fft - ) - target_include_directories( - ${BENCHMARK_NAME} - SYSTEM PRIVATE ${benchmark_SOURCE_DIR}/include - ) - target_include_directories( - ${BENCHMARK_NAME} - PRIVATE ${CMAKE_BINARY_DIR} - ) - - foreach(SOURCE_FILE ${BENCHMARK_SOURCES}) - SET_SOURCE_FILES_PROPERTIES( - ${SOURCE_FILE} - PROPERTIES LANGUAGE CXX - ) - endforeach() - - string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC) - set( - BENCHMARK_ARGS - --benchmark_counters_tabular=true - --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json - ) +add_executable( + ${BENCHMARK_NAME} + BenchmarkMain.cpp + PerfTest_FFT1.cpp + PerfTest_FFT2.cpp +) - add_test( - NAME ${BENCHMARK_NAME} - COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS} - ) -endfunction() +target_link_libraries(${BENCHMARK_NAME} PUBLIC benchmark::benchmark KokkosFFT::fft) +target_include_directories(${BENCHMARK_NAME} PUBLIC ${CMAKE_BINARY_DIR}) -# Set benchmark targets +string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC) set( - BENCHMARK_SOURCES - PerfTest_FFT1.cpp - PerfTest_FFT2.cpp + BENCHMARK_ARGS + --benchmark_counters_tabular=true + --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json ) -KOKKOSFFT_ADD_BENCHMARK( - PerformanceTest_Benchmark - SOURCES ${BENCHMARK_SOURCES} +add_test( + NAME ${BENCHMARK_NAME} + COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS} ) \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.cpp b/fft/perf_test/PerfTest_FFT1.cpp index 4148103a..0f535d77 100644 --- a/fft/perf_test/PerfTest_FFT1.cpp +++ b/fft/perf_test/PerfTest_FFT1.cpp @@ -1,5 +1,3 @@ -#include -#include "Benchmark_Context.hpp" #include "PerfTest_FFT1.hpp" namespace KokkosFFTBenchmark { diff --git a/fft/perf_test/PerfTest_FFT2.cpp b/fft/perf_test/PerfTest_FFT2.cpp index 67a09109..00358f28 100644 --- a/fft/perf_test/PerfTest_FFT2.cpp +++ b/fft/perf_test/PerfTest_FFT2.cpp @@ -1,5 +1,3 @@ -#include -#include "Benchmark_Context.hpp" #include "PerfTest_FFT2.hpp" namespace KokkosFFTBenchmark { From df08e96c7867914c1fc1749819dc738146056dd7 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Fri, 2 Feb 2024 15:01:27 -0500 Subject: [PATCH 11/13] Adding pthread flag for HIP build --- .github/workflows/cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 5518f114..19352c27 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -138,7 +138,7 @@ jobs: docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_${{matrix.backend.name}} \ -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkosFFT_INTERNAL_Kokkos=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON \ - -DKokkosFFT_ENABLE_BENCHMARK=ON ${{matrix.backend.option}} + -DKokkosFFT_ENABLE_BENCHMARK=ON -DCMAKE_CXX_FLAGS="-pthread" ${{matrix.backend.option}} - name: Build # Build your program with the given configuration From 151a10f05c57f22f25eae370a030985d75f38340 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 8 Feb 2024 22:16:36 +0900 Subject: [PATCH 12/13] Build benchmarks in CI --- .github/workflows/build_test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 1f8e1610..3656fc37 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -132,6 +132,7 @@ jobs: -DCMAKE_CXX_COMPILER=${{ matrix.backend.cxx_compiler }} \ -DCMAKE_CXX_STANDARD=17 \ -DBUILD_TESTING=ON \ + -DKokkosFFT_ENABLE_BENCHMARK=ON \ -DKokkosFFT_INTERNAL_Kokkos=ON \ ${{ matrix.backend.cmake_flags }} \ ${{ matrix.target.cmake_flags }} From be2f3013a35f3ff390355264681107d9d478dc29 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 8 Feb 2024 22:46:26 +0900 Subject: [PATCH 13/13] update hip based image to 5.4 --- docker/hip/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/hip/Dockerfile b/docker/hip/Dockerfile index 3e94d376..21f13e8d 100644 --- a/docker/hip/Dockerfile +++ b/docker/hip/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE=rocm/dev-ubuntu-20.04:5.2 +ARG BASE=rocm/dev-ubuntu-20.04:5.4 FROM $BASE ARG ADDITIONAL_PACKAGES