diff --git a/.clang-format b/.clang-format
index 30a5ef1db..a057d0bef 100644
--- a/.clang-format
+++ b/.clang-format
@@ -79,7 +79,7 @@ IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^"plssvm/'
Priority: 1
- - Regex: '^"(cuda|hip|CL|sycl|omp)'
+ - Regex: '^"(cuda|hip|CL|sycl|omp|hpx)'
Priority: 2
- Regex: '^"(tests|bindings)/'
Priority: 3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c7de08b3..593b7b8f5 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
## License: This file is part of the PLSSVM project which is released under the MIT license.
## See the LICENSE.md file in the project root for full license information.
@@ -376,6 +376,13 @@ if (PLSSVM_ENABLE_STDPAR_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_STDPAR_BACKEND)
add_subdirectory(src/plssvm/backends/stdpar)
endif ()
+## check for HPX backend
+set(PLSSVM_ENABLE_HPX_BACKEND AUTO CACHE STRING "Enable HPX Backend")
+set_property(CACHE PLSSVM_ENABLE_HPX_BACKEND PROPERTY STRINGS AUTO ON OFF)
+if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_HPX_BACKEND)
+ add_subdirectory(src/plssvm/backends/HPX)
+endif ()
+
## check for CUDA backend
set(PLSSVM_ENABLE_CUDA_BACKEND AUTO CACHE STRING "Enable CUDA Backend")
set_property(CACHE PLSSVM_ENABLE_CUDA_BACKEND PROPERTY STRINGS AUTO ON OFF)
@@ -705,6 +712,10 @@ if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
message(STATUS "${PLSSVM_STDPAR_BACKEND_SUMMARY_STRING}")
list(APPEND PLSSVM_BACKEND_NAME_LIST "stdpar")
endif ()
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+ message(STATUS "${PLSSVM_HPX_BACKEND_SUMMARY_STRING}")
+ list(APPEND PLSSVM_BACKEND_NAME_LIST "hpx")
+endif ()
if (TARGET ${PLSSVM_CUDA_BACKEND_LIBRARY_NAME})
message(STATUS "${PLSSVM_CUDA_BACKEND_SUMMARY_STRING}")
list(APPEND PLSSVM_BACKEND_NAME_LIST "cuda")
@@ -852,6 +863,7 @@ install(FILES
"${PROJECT_BINARY_DIR}/plssvmHIPTargets.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenCLTargets.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenMPTargets.cmake"
+ "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmHPXTargets.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmAdaptiveCppTargets.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmDPCPPTargets.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmstdparTargets.cmake"
diff --git a/CMakePresets.json b/CMakePresets.json
index 8e4925dd0..c6bf7373f 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -2,6 +2,7 @@
"version": 6,
"include": [
"cmake/presets/openmp.json",
+ "cmake/presets/hpx.json",
"cmake/presets/stdpar.json",
"cmake/presets/stdpar_gcc.json",
"cmake/presets/stdpar_nvhpc.json",
@@ -15,4 +16,4 @@
"cmake/presets/dpcpp.json",
"cmake/presets/all.json"
]
-}
\ No newline at end of file
+}
diff --git a/README.md b/README.md
index 566ac248a..394dd8e04 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,7 @@ The main highlights of our SVM implementations are:
1. Drop-in replacement for LIBSVM's `svm-train`, `svm-predict`, and `svm-scale` (some features currently not implemented).
2. Support of multiple different programming frameworks for parallelization (also called backends in our PLSSVM implementation) which allows us to target GPUs and CPUs from different vendors like NVIDIA, AMD, or Intel:
- [OpenMP](https://www.openmp.org/)
+ - [HPX](https://hpx.stellar-group.org/)
- [stdpar](https://en.cppreference.com/w/cpp/algorithm) (supported implementations are [nvc++](https://developer.nvidia.com/hpc-sdk) from NVIDIA's HPC SDK, [roc-stdpar](https://github.com/ROCm/roc-stdpar) as a patched LLVM, [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html) as Intel's oneAPI compiler, [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp), and [GNU GCC](https://gcc.gnu.org/) using TBB).
**Note**: due to the nature of the used USM mechanics in the `stdpar` implementations, the `stdpar` backend **can't** be enabled together with **any** other backend!
**Note**: since every translation units need to be compiled with the same flag, we currently globally set `CMAKE_CXX_FLAGS` although it's discouraged in favor of `target_compile_options`.
@@ -105,6 +106,10 @@ Additional dependencies for the stdpar backend:
- compiler with stdpar support
+Additional dependencies for the HPX backend:
+
+- [HPX ≥ v1.9.0](https://hpx.stellar-group.org/)
+
Additional dependencies for the CUDA backend:
- CUDA SDK
@@ -355,6 +360,9 @@ Available configure presets:
"openmp" - OpenMP backend
"openmp_python" - OpenMP backend + Python bindings
"openmp_test" - OpenMP backend tests
+ "hpx" - HPX backend
+ "hpx_python" - HPX backend + Python bindings
+ "hpx_test" - HPX backend tests
"cuda" - CUDA backend
"cuda_python" - CUDA backend + Python bindings
"cuda_test" - CUDA backend tests
@@ -545,7 +553,7 @@ Usage:
-i, --max_iter arg set the maximum number of CG iterations (default: num_features)
-l, --solver arg choose the solver: automatic|cg_explicit|cg_implicit (default: automatic)
-a, --classification arg the classification strategy to use for multi-class classification: oaa|oao (default: oaa)
- -b, --backend arg choose the backend: automatic|openmp|cuda|hip|opencl|sycl|stdpar (default: automatic)
+ -b, --backend arg choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|stdpar (default: automatic)
-p, --target_platform arg choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic)
--sycl_kernel_invocation_type arg
choose the kernel invocation type when using SYCL as backend: automatic|nd_range (default: automatic)
@@ -589,13 +597,14 @@ The `--backend=automatic` option works as follows:
- if the `gpu_nvidia` target is available, check for existing backends in order `cuda` 🠦 `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
- otherwise, if the `gpu_amd` target is available, check for existing backends in order `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
- otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `stdpar`
-- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `stdpar`
+- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `hpx` 🠦 `stdpar`
Note that during CMake configuration it is guaranteed that at least one of the above combinations does exist.
The `--target_platform=automatic` option works for the different backends as follows:
- `OpenMP`: always selects a CPU
+- `HPX`: always selects a CPU
- `CUDA`: always selects an NVIDIA GPU (if no NVIDIA GPU is available, throws an exception)
- `HIP`: always selects an AMD GPU (if no AMD GPU is available, throws an exception)
- `OpenCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt
index 5bead042a..f951f77a4 100644
--- a/bindings/Python/CMakeLists.txt
+++ b/bindings/Python/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
## License: This file is part of the PLSSVM project which is released under the MIT license.
## See the LICENSE.md file in the project root for full license information.
@@ -68,6 +68,9 @@ endif ()
if (TARGET ${PLSSVM_OPENMP_BACKEND_LIBRARY_NAME})
list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/openmp_csvm.cpp)
endif ()
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+ list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/hpx_csvm.cpp)
+endif ()
if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
# AdaptiveCpp stdpar only support on the CPU when using our Python bindings
@@ -125,4 +128,4 @@ target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$(m, "BackendType")
.value("AUTOMATIC", plssvm::backend_type::automatic, "the default backend; depends on the specified target platform")
.value("OPENMP", plssvm::backend_type::openmp, "OpenMP to target CPUs only (currently no OpenMP target offloading support)")
+ .value("HPX", plssvm::backend_type::hpx, "HPX to target CPUs only (currently no GPU executor support)")
.value("STDPAR", plssvm::backend_type::stdpar, "C++ standard parallelism to target CPUs and GPUs from different vendors based on the used stdpar implementation; supported implementations are: nvhpc (nvc++), roc-stdpar, AdaptiveCpp, Intel LLVM (icpx), and GNU GCC + TBB")
.value("CUDA", plssvm::backend_type::cuda, "CUDA to target NVIDIA GPUs only")
.value("HIP", plssvm::backend_type::hip, "HIP to target AMD and NVIDIA GPUs")
diff --git a/bindings/Python/backends/hpx_csvm.cpp b/bindings/Python/backends/hpx_csvm.cpp
new file mode 100644
index 000000000..92b4fef10
--- /dev/null
+++ b/bindings/Python/backends/hpx_csvm.cpp
@@ -0,0 +1,57 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/HPX/csvm.hpp" // plssvm::hpx::csvm
+#include "plssvm/backends/HPX/exceptions.hpp" // plssvm::hpx::backend_exception
+#include "plssvm/csvm.hpp" // plssvm::csvm
+#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception
+#include "plssvm/parameter.hpp" // plssvm::parameter
+#include "plssvm/target_platforms.hpp" // plssvm::target_platform
+
+#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception
+
+#include "pybind11/pybind11.h" // py::module_, py::class_, py::init
+#include "pybind11/stl.h" // support for STL types
+
+#include // std::make_unique
+
+namespace py = pybind11;
+
+void init_hpx_csvm(py::module_ &m, const py::exception &base_exception) {
+ // use its own submodule for the HPX CSVM bindings
+ py::module_ hpx_module = m.def_submodule("hpx", "a module containing all HPX backend specific functionality");
+
+ // bind the CSVM using the HPX backend
+ py::class_(hpx_module, "CSVM")
+ .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object")
+ .def(py::init(), "create an SVM with the automatic target platform and provided parameter object")
+ .def(py::init(), "create an SVM with the provided target platform and default parameter object")
+ .def(py::init(), "create an SVM with the provided target platform and parameter object")
+ .def(py::init([](const py::kwargs &args) {
+ // check for valid keys
+ check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
+ // if one of the value keyword parameter is provided, set the respective value
+ const plssvm::parameter params = convert_kwargs_to_parameter(args);
+ // create CSVM with the default target platform
+ return std::make_unique(params);
+ }),
+ "create an SVM with the default target platform and keyword arguments")
+ .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
+ // check for valid keys
+ check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
+ // if one of the value keyword parameter is provided, set the respective value
+ const plssvm::parameter params = convert_kwargs_to_parameter(args);
+ // create CSVM with the provided target platform
+ return std::make_unique(target, params);
+ }),
+ "create an SVM with the provided target platform and keyword arguments");
+
+ // register HPX backend specific exceptions
+ register_py_exception(hpx_module, "BackendError", base_exception);
+}
diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp
index f37bc20db..170afa2c3 100644
--- a/bindings/Python/main.cpp
+++ b/bindings/Python/main.cpp
@@ -1,6 +1,7 @@
/**
* @author Alexander Van Craen
* @author Marcel Breyer
+ * @author Alexander Strack
* @copyright 2018-today The PLSSVM project - All Rights Reserved
* @license This file is part of the PLSSVM project which is released under the MIT license.
* See the LICENSE.md file in the project root for full license information.
@@ -35,6 +36,7 @@ void init_environment(py::module_ &);
void init_exceptions(py::module_ &, const py::exception &);
void init_csvm(py::module_ &);
void init_openmp_csvm(py::module_ &, const py::exception &);
+void init_hpx_csvm(py::module_ &, const py::exception &);
void init_stdpar_csvm(py::module_ &, const py::exception &);
void init_cuda_csvm(py::module_ &, const py::exception &);
void init_hip_csvm(py::module_ &, const py::exception &);
@@ -86,6 +88,9 @@ PYBIND11_MODULE(plssvm, m) {
#if defined(PLSSVM_HAS_OPENMP_BACKEND)
init_openmp_csvm(m, base_exception);
#endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ init_hpx_csvm(m, base_exception);
+#endif
#if defined(PLSSVM_HAS_STDPAR_BACKEND)
init_stdpar_csvm(m, base_exception);
#endif
diff --git a/cmake/plssvm/plssvmConfig.cmake.in b/cmake/plssvm/plssvmConfig.cmake.in
index e6be17d15..9636e125e 100644
--- a/cmake/plssvm/plssvmConfig.cmake.in
+++ b/cmake/plssvm/plssvmConfig.cmake.in
@@ -25,7 +25,7 @@ find_dependency(fmt REQUIRED)
include("${CMAKE_CURRENT_LIST_DIR}/plssvmTargets.cmake")
# list all available libraries
-set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar")
+set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;HPX;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar")
set(PLSSVM_DISABLED_COMPONENTS "${PLSSVM_SUPPORTED_COMPONENTS}")
# check which libraries are available
diff --git a/cmake/plssvm/plssvmHPXTargets.cmake b/cmake/plssvm/plssvmHPXTargets.cmake
new file mode 100644
index 000000000..8fa711790
--- /dev/null
+++ b/cmake/plssvm/plssvmHPXTargets.cmake
@@ -0,0 +1,21 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+## See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+include(CMakeFindDependencyMacro)
+
+# check if the HPX backend is available
+if (TARGET plssvm::plssvm-HPX)
+ # enable HPX
+ find_dependency(HPX)
+ # set alias targets
+ add_library(plssvm::HPX ALIAS plssvm::plssvm-HPX)
+ add_library(plssvm::hpx ALIAS plssvm::plssvm-HPX)
+ # set COMPONENT to be found
+ set(plssvm_HPX_FOUND ON)
+else ()
+ # set COMPONENT to be NOT found
+ set(plssvm_HPX_FOUND OFF)
+endif ()
diff --git a/cmake/plssvm/plssvmOpenMPTargets.cmake b/cmake/plssvm/plssvmOpenMPTargets.cmake
index db95e1d1e..d8a6951f2 100644
--- a/cmake/plssvm/plssvmOpenMPTargets.cmake
+++ b/cmake/plssvm/plssvmOpenMPTargets.cmake
@@ -6,7 +6,7 @@
include(CMakeFindDependencyMacro)
-# check if the OpenCL backend is available
+# check if the OpenMP backend is available
if (TARGET plssvm::plssvm-OpenMP)
# enable OpenMP
find_dependency(OpenMP)
@@ -18,4 +18,4 @@ if (TARGET plssvm::plssvm-OpenMP)
else ()
# set COMPONENT to be NOT found
set(plssvm_OpenMP_FOUND OFF)
-endif ()
\ No newline at end of file
+endif ()
diff --git a/cmake/presets/all.json b/cmake/presets/all.json
index 76528069b..a1db4d1bc 100644
--- a/cmake/presets/all.json
+++ b/cmake/presets/all.json
@@ -9,6 +9,7 @@
"cacheVariables": {
"CMAKE_CXX_COMPILER": "clang++",
"PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+ "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
"PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
"PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
"PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -23,6 +24,7 @@
"cacheVariables": {
"CMAKE_CXX_COMPILER": "clang++",
"PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+ "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
"PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
"PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
"PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -39,6 +41,7 @@
"cacheVariables": {
"CMAKE_CXX_COMPILER": "clang++",
"PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+ "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
"PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
"PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
"PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -84,7 +87,7 @@
"inherits": "common",
"filter": {
"include": {
- "name": "OpenMP.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*"
+ "name": "OpenMP.*|HPX.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*"
}
}
}
@@ -155,4 +158,4 @@
]
}
]
-}
\ No newline at end of file
+}
diff --git a/cmake/presets/common.json b/cmake/presets/common.json
index fac5b4b22..68da8cd61 100644
--- a/cmake/presets/common.json
+++ b/cmake/presets/common.json
@@ -12,6 +12,7 @@
"binaryDir": "build/${presetName}",
"cacheVariables": {
"PLSSVM_ENABLE_OPENMP_BACKEND": "OFF",
+ "PLSSVM_ENABLE_HPX_BACKEND": "OFF",
"PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
"PLSSVM_ENABLE_CUDA_BACKEND": "OFF",
"PLSSVM_ENABLE_HIP_BACKEND": "OFF",
@@ -66,4 +67,4 @@
}
}
]
-}
\ No newline at end of file
+}
diff --git a/cmake/presets/hpx.json b/cmake/presets/hpx.json
new file mode 100644
index 000000000..8ca724653
--- /dev/null
+++ b/cmake/presets/hpx.json
@@ -0,0 +1,143 @@
+{
+ "version": 6,
+ "include": ["common.json"],
+ "configurePresets": [
+ {
+ "name": "hpx",
+ "displayName": "HPX backend",
+ "inherits": "build",
+ "cacheVariables": {
+ "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+ "PLSSVM_TARGET_PLATFORMS": "cpu"
+ }
+ },
+ {
+ "name": "hpx_python",
+ "displayName": "HPX backend + Python bindings",
+ "inherits": "build",
+ "cacheVariables": {
+ "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+ "PLSSVM_TARGET_PLATFORMS": "cpu",
+ "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
+ "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
+ }
+ },
+ {
+ "name": "hpx_test",
+ "displayName": "HPX backend tests",
+ "inherits": "test",
+ "cacheVariables": {
+ "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+ "PLSSVM_TARGET_PLATFORMS": "cpu"
+ }
+ }
+ ],
+ "buildPresets": [
+ {
+ "name": "hpx",
+ "displayName": "HPX backend",
+ "configurePreset": "hpx",
+ "configuration": "RelWithDebInfo",
+ "inherits": "common"
+ },
+ {
+ "name": "hpx_python",
+ "displayName": "HPX backend + Python bindings",
+ "configurePreset": "hpx_python",
+ "configuration": "RelWithDebInfo",
+ "inherits": "common"
+ },
+ {
+ "name": "hpx_test",
+ "displayName": "HPX backend tests",
+ "configurePreset": "hpx_test",
+ "configuration": "Debug",
+ "inherits": "common"
+ }
+ ],
+ "testPresets": [
+ {
+ "name": "hpx_test",
+ "displayName": "HPX backend all tests",
+ "configurePreset": "hpx_test",
+ "inherits": "common"
+ },
+ {
+ "name": "hpx_backend_test",
+ "displayName": "HPX backend specific tests",
+ "configurePreset": "hpx_test",
+ "inherits": "common",
+ "filter": {
+ "include": {
+ "name": "HPX.*"
+ }
+ }
+ }
+ ],
+ "workflowPresets": [
+ {
+ "name": "hpx",
+ "displayName": "HPX backend workflow",
+ "steps": [
+ {
+ "name": "hpx",
+ "type": "configure"
+ },
+ {
+ "name": "hpx",
+ "type": "build"
+ }
+ ]
+ },
+ {
+ "name": "hpx_python",
+ "displayName": "HPX backend + Python bindings workflow",
+ "steps": [
+ {
+ "name": "hpx_python",
+ "type": "configure"
+ },
+ {
+ "name": "hpx_python",
+ "type": "build"
+ }
+ ]
+ },
+ {
+ "name": "hpx_test",
+ "displayName": "HPX test workflow",
+ "steps": [
+ {
+ "name": "hpx_test",
+ "type": "configure"
+ },
+ {
+ "name": "hpx_test",
+ "type": "build"
+ },
+ {
+ "name": "hpx_test",
+ "type": "test"
+ }
+ ]
+ },
+ {
+ "name": "hpx_backend_test",
+ "displayName": "HPX backend test workflow",
+ "steps": [
+ {
+ "name": "hpx_test",
+ "type": "configure"
+ },
+ {
+ "name": "hpx_test",
+ "type": "build"
+ },
+ {
+ "name": "hpx_backend_test",
+ "type": "test"
+ }
+ ]
+ }
+ ]
+}
diff --git a/cmake/presets/openmp.json b/cmake/presets/openmp.json
index c4dd27a4d..1031d50d0 100644
--- a/cmake/presets/openmp.json
+++ b/cmake/presets/openmp.json
@@ -7,7 +7,8 @@
"displayName": "OpenMP backend",
"inherits": "build",
"cacheVariables": {
- "PLSSVM_ENABLE_OPENMP_BACKEND": "ON"
+ "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+ "PLSSVM_TARGET_PLATFORMS": "cpu"
}
},
{
@@ -16,6 +17,7 @@
"inherits": "build",
"cacheVariables": {
"PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+ "PLSSVM_TARGET_PLATFORMS": "cpu",
"PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
"PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
}
@@ -25,7 +27,8 @@
"displayName": "OpenMP backend tests",
"inherits": "test",
"cacheVariables": {
- "PLSSVM_ENABLE_OPENMP_BACKEND": "ON"
+ "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+ "PLSSVM_TARGET_PLATFORMS": "cpu"
}
}
],
@@ -137,4 +140,4 @@
]
}
]
-}
\ No newline at end of file
+}
diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox
index 8c3119aab..84e561a46 100644
--- a/docs/resources/dirs.dox
+++ b/docs/resources/dirs.dox
@@ -329,6 +329,66 @@
* @brief Directory containing kernel implementations for the implicit CG algorithm using the stdpar backend.
*/
+ /**
+ * @dir include/plssvm/backends/HPX
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing the implementation for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/detail
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @authir Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementation details for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/kernel
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing all kernels for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/kernel/cg_explicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the explicit CG algorithm using the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/kernel/cg_implicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the implicit CG algorithm using the HPX backend.
+ */
+
/**
* @dir include/plssvm/backends/SYCL
* @author Alexander Van Craen
@@ -504,4 +564,4 @@
* See the LICENSE.md file in the project root for full license information.
*
* @brief Directory containing compile-time constant meta data for git specific information.
- */
\ No newline at end of file
+ */
diff --git a/include/plssvm/backend_types.hpp b/include/plssvm/backend_types.hpp
index 7bdbcb9e4..449f5dcdd 100644
--- a/include/plssvm/backend_types.hpp
+++ b/include/plssvm/backend_types.hpp
@@ -2,6 +2,7 @@
* @file
* @author Alexander Van Craen
* @author Marcel Breyer
+ * @author Alexander Strack
* @copyright 2018-today The PLSSVM project - All Rights Reserved
* @license This file is part of the PLSSVM project which is released under the MIT license.
* See the LICENSE.md file in the project root for full license information.
@@ -44,7 +45,9 @@ enum class backend_type {
/** [OpenCL](https://www.khronos.org/opencl/) to target CPUs and GPUs from different vendors. */
opencl,
/** [SYCL](https://www.khronos.org/sycl/) to target CPUs and GPUs from different vendors. Currently tested SYCL implementations are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL). */
- sycl
+ sycl,
+ /** [HPX] (https://hpx.stellar-group.org/) to target CPUs only (currently no GPU support). */
+ hpx
};
/**
@@ -84,6 +87,7 @@ std::istream &operator>>(std::istream &in, backend_type &backend);
// Forward declare all possible C-SVMs.
namespace openmp { class csvm; }
namespace stdpar { class csvm; }
+namespace hpx { class csvm; }
namespace cuda { class csvm; }
namespace hip { class csvm; }
namespace opencl { class csvm; }
@@ -118,6 +122,15 @@ struct csvm_to_backend_type {
constexpr static backend_type value = backend_type::stdpar;
};
+/**
+ * @brief Sets the `value` to `plssvm::backend_type::hpx` for the HPX C-SVM.
+ */
+template <>
+struct csvm_to_backend_type {
+ /// The enum value representing the hpx backend.
+ constexpr static backend_type value = backend_type::hpx;
+};
+
/**
* @brief Sets the `value` to `plssvm::backend_type::cuda` for the CUDA C-SVM.
*/
diff --git a/include/plssvm/backends/HPX/csvm.hpp b/include/plssvm/backends/HPX/csvm.hpp
new file mode 100644
index 000000000..d9dba1e6e
--- /dev/null
+++ b/include/plssvm/backends/HPX/csvm.hpp
@@ -0,0 +1,170 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a C-SVM using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_CSVM_HPP_
+#define PLSSVM_BACKENDS_HPX_CSVM_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp" // plssvm::real_type
+#include "plssvm/csvm.hpp" // plssvm::csvm, plssvm::detail::csvm_backend_exists
+#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp" // plssvm::detail::move_only_any
+#include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES
+#include "plssvm/matrix.hpp" // plssvm::aos_matrix
+#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
+#include "plssvm/solver_types.hpp" // plssvm::solver_type
+#include "plssvm/target_platforms.hpp" // plssvm::target_platform
+
+#include // std::size_t
+#include // std::true_type
+#include // std::forward, std::pair
+#include // std::vector
+
+namespace plssvm {
+
+namespace hpx {
+
+/**
+ * @brief A C-SVM implementation using hpx as backend.
+ */
+class csvm : public ::plssvm::csvm {
+ public:
+ /**
+ * @brief Construct a new C-SVM using the HPX backend with the parameters given through @p params.
+ * @param[in] params struct encapsulating all possible SVM parameters
+ * @throws plssvm::exception all exceptions thrown in the base class constructor
+ * @throws plssvm::hpx::backend_exception if the requested target is not available
+ * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+ */
+ explicit csvm(parameter params = {});
+ /**
+ * @brief Construct a new C-SVM using the hpx backend on the @p target platform with the parameters given through @p params.
+ * @param[in] target the target platform used for this C-SVM
+ * @param[in] params struct encapsulating all possible SVM parameters
+ * @throws plssvm::exception all exceptions thrown in the base class constructor
+ * @throws plssvm::hpx::backend_exception if the requested target is not available
+ * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+ */
+ explicit csvm(target_platform target, parameter params = {});
+
+ /**
+ * @brief Construct a new C-SVM using the HPX backend and the optionally provided @p named_args.
+ * @param[in] named_args the additional optional named-parameters
+ * @throws plssvm::exception all exceptions thrown in the base class constructor
+ * @throws plssvm::hpx::backend_exception if the requested target is not available
+ * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+ */
+ template )>
+ explicit csvm(Args &&...named_args) :
+ ::plssvm::csvm{ std::forward(named_args)... } {
+ // the default target is the automatic one
+ this->init(plssvm::target_platform::automatic);
+ }
+
+ /**
+ * @brief Construct a new C-SVM using the HPX backend on the @p target platform and the optionally provided @p named_args.
+ * @param[in] target the target platform used for this C-SVM
+ * @param[in] named_args the additional optional named-parameters
+ * @throws plssvm::exception all exceptions thrown in the base class constructor
+ * @throws plssvm::hpx::backend_exception if the requested target is not available
+ * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+ */
+ template )>
+ explicit csvm(const target_platform target, Args &&...named_args) :
+ ::plssvm::csvm{ std::forward(named_args)... } {
+ this->init(target);
+ }
+
+ /**
+ * @copydoc plssvm::csvm::csvm(const plssvm::csvm &)
+ */
+ csvm(const csvm &) = delete;
+ /**
+ * @copydoc plssvm::csvm::csvm(plssvm::csvm &&) noexcept
+ */
+ csvm(csvm &&) noexcept = default;
+ /**
+ * @copydoc plssvm::csvm::operator=(const plssvm::csvm &)
+ */
+ csvm &operator=(const csvm &) = delete;
+ /**
+ * @copydoc plssvm::csvm::operator=(plssvm::csvm &&) noexcept
+ */
+ csvm &operator=(csvm &&) noexcept = default;
+ /**
+ * @brief Default destructor since the copy and move constructors and copy- and move-assignment operators are defined.
+ */
+ ~csvm() override = default;
+
+ /**
+ * @copydoc plssvm::csvm::num_available_devices
+ * @note We currently only support one device for C++ standard parallelism.
+ */
+ [[nodiscard]] std::size_t num_available_devices() const noexcept override {
+ return 1;
+ }
+
+ protected:
+ /**
+ * @copydoc plssvm::csvm::get_device_memory
+ */
+ [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_device_memory() const final;
+ /**
+ * @copydoc plssvm::csvm::get_max_mem_alloc_size
+ */
+ [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_max_mem_alloc_size() const final;
+
+ //***************************************************//
+ // fit //
+ //***************************************************//
+ /**
+ * @copydoc plssvm::csvm::assemble_kernel_matrix
+ */
+ [[nodiscard]] std::vector<::plssvm::detail::move_only_any> assemble_kernel_matrix(solver_type solver, const parameter ¶ms, const soa_matrix &A, const std::vector &q_red, real_type QA_cost) const final;
+ /**
+ * @copydoc plssvm::csvm::blas_level_3
+ */
+ void blas_level_3(solver_type solver, real_type alpha, const std::vector<::plssvm::detail::move_only_any> &A, const soa_matrix &B, real_type beta, soa_matrix &C) const final;
+
+ //***************************************************//
+ // predict, score //
+ //***************************************************//
+ /**
+ * @copydoc plssvm::csvm::predict_values
+ */
+ [[nodiscard]] aos_matrix predict_values(const parameter ¶ms, const soa_matrix &support_vectors, const aos_matrix &alpha, const std::vector &rho, soa_matrix &w, const soa_matrix &predict_points) const final;
+
+ private:
+ /**
+ * @brief Initializes the hpx backend and performs some sanity checks.
+ * @param[in] target the target platform to use
+ * @throws plssvm::hpx::backend_exception if the requested target is not available
+ * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+ */
+ void init(target_platform target);
+};
+
+} // namespace hpx
+
+namespace detail {
+
+/**
+ * @brief Sets the `value` to `true` since C-SVMs using the HPX backend are available.
+ */
+template <>
+struct csvm_backend_exists : std::true_type { };
+
+} // namespace detail
+
+} // namespace plssvm
+
+#endif // PLSSVM_BACKENDS_HPX_CSVM_HPP_
diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
new file mode 100644
index 000000000..3fcdb04d0
--- /dev/null
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -0,0 +1,38 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Utility functions specific to the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
+#define PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
+#pragma once
+
+#include "boost/atomic/atomic_ref.hpp" // boost::atomic_ref
+#include // std::string
+
+namespace plssvm::hpx::detail {
+
+using boost::atomic_ref;
+
+/**
+ * @brief Return the number of used CPU threads in the HPX backend.
+ * @return the number of used CPU threads (`[[nodiscard]]`)
+ */
+[[nodiscard]] int get_num_threads();
+
+/**
+ * @brief Return the HPX version used.
+ * @return the HPX version (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string get_hpx_version();
+
+} // namespace plssvm::hpx::detail
+
+#endif // PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
diff --git a/include/plssvm/backends/HPX/exceptions.hpp b/include/plssvm/backends/HPX/exceptions.hpp
new file mode 100644
index 000000000..fc7925f24
--- /dev/null
+++ b/include/plssvm/backends/HPX/exceptions.hpp
@@ -0,0 +1,39 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implements custom exception classes specific to the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
+#define PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
+#pragma once
+
+#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception
+#include "plssvm/exceptions/source_location.hpp" // plssvm::source_location
+
+#include // std::string
+
+namespace plssvm::hpx {
+
+/**
+ * @brief Exception type thrown if a problem with the HPX backend occurs.
+ */
+class backend_exception : public exception {
+ public:
+ /**
+ * @brief Construct a new exception forwarding the exception message and source location to plssvm::exception.
+ * @param[in] msg the exception's `what()` message
+ * @param[in] loc the exception's call side information
+ */
+ explicit backend_exception(const std::string &msg, source_location loc = source_location::current());
+};
+
+} // namespace plssvm::hpx
+
+#endif // PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
new file mode 100644
index 000000000..09f6e6358
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -0,0 +1,107 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp" // plssvm::{real_type, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT
+#include "plssvm/matrix.hpp" // plssvm::soa_matrix
+#include "plssvm/shape.hpp" // plssvm::shape
+
+#include // std::array
+#include // std::ceil
+#include // std::size_t
+#include // hpx::execution::par_unseq
+#include // hpx::for_each
+#include // std::iota
+#include // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a symmetric matrix (memory optimized), @p B and @p C are matrices, and @p alpha and @p beta are scalars.
+ * @param[in] num_rows the number of rows in @p A and @p C
+ * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] alpha the scalar alpha value
+ * @param[in] A the matrix @p A
+ * @param[in] B the matrix @p B
+ * @param[in] beta the scalar beta value
+ * @param[in,out] C the matrix @p C, also used as result matrix
+ */
+inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const real_type alpha, const std::vector &A, const soa_matrix &B, const real_type beta, soa_matrix &C) {
+ PLSSVM_ASSERT(A.size() == (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2, "A matrix sizes mismatch!: {} != {}", A.size(), (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2);
+ PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
+ PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+
+ // calculate constants
+ const auto blocked_num_rhs = static_cast(std::ceil(static_cast(num_rhs) / INTERNAL_BLOCK_SIZE));
+ const auto blocked_num_rows = static_cast(std::ceil(static_cast(num_rows) / INTERNAL_BLOCK_SIZE));
+
+ // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+ const auto INTERNAL_BLOCK_SIZE_uz = static_cast(INTERNAL_BLOCK_SIZE);
+ const auto PADDING_SIZE_uz = static_cast(PADDING_SIZE);
+
+ // define range over which should be iterated
+ std::vector range(blocked_num_rhs * blocked_num_rows); // define range over which should be iterated
+ std::iota(range.begin(), range.end(), 0);
+
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+ // calculate the indices used in the current thread
+ const std::size_t rhs = idx / blocked_num_rows;
+ const std::size_t row = idx % blocked_num_rows;
+
+ const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
+ const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+
+ // create a thread private array used for internal caching
+ std::array, INTERNAL_BLOCK_SIZE> temp{};
+
+ // iterate over all features
+ for (std::size_t dim = 0; dim < num_rows; ++dim) {
+ // perform the dot product calculation
+ for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+ for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+ const std::size_t global_i = rhs_idx + static_cast(internal_i);
+ const std::size_t global_j = row_idx + static_cast(internal_j);
+
+ real_type A_val = 0.0;
+ // determine on which side of the diagonal we are located
+ if (dim < global_j) {
+ A_val = A.data()[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
+ } else {
+ A_val = A.data()[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+ }
+ temp[internal_i][internal_j] += A_val * B.data()[dim * (num_rhs + PADDING_SIZE_uz) + global_i];
+ }
+ }
+ }
+
+ // apply the (partial) BLAS operation and update C
+ for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+ for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+ const std::size_t global_i = rhs_idx + static_cast(internal_i);
+ const std::size_t global_j = row_idx + static_cast(internal_j);
+
+ // be sure to not perform out of bounds accesses
+ if (global_i < num_rhs && global_j < num_rows) {
+ C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i] = alpha * temp[internal_i][internal_j] + beta * C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i];
+ }
+ }
+ }
+ });
+}
+
+} // namespace plssvm::hpx::detail
+
+#endif // PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
new file mode 100644
index 000000000..2e59bf078
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -0,0 +1,111 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly assembling the kernel matrix using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp" // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp" // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp" // plssvm::aos_matrix
+
+#include // std::array
+#include // std::ceil, std::sqrt
+#include // std::size_t
+#include // hpx::execution::par_unseq
+#include // hpx::for_each
+#include // std::iota
+#include // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Assemble the kernel matrix using the @p kernel function.
+ * @tparam kernel the compile-time kernel function to use
+ * @tparam Args the types of the potential additional arguments for the @p kernel function
+ * @param[in] q the `q` vector
+ * @param[out] kernel_matrix the resulting kernel matrix
+ * @param[in] data the data matrix
+ * @param[in] QA_cost he bottom right matrix entry multiplied by cost
+ * @param[in] cost 1 / the cost parameter in the C-SVM
+ * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
+ */
+template
+void device_kernel_assembly(const std::vector &q, std::vector &kernel_matrix, const soa_matrix &data, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
+ PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+ PLSSVM_ASSERT(kernel_matrix.size() == (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2, "Sizes mismatch (SYMM)!: {} != {}", kernel_matrix.size(), (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2);
+ PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
+
+ const std::size_t dept = q.size();
+ const auto blocked_dept = static_cast(std::ceil(static_cast(dept) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_features = data.num_cols();
+
+ // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+ const auto INTERNAL_BLOCK_SIZE_uz = static_cast(INTERNAL_BLOCK_SIZE);
+ const auto PADDING_SIZE_uz = static_cast(PADDING_SIZE);
+
+ // define range over which should be iterated
+ std::vector range(blocked_dept * (blocked_dept + 1) / 2);
+ std::iota(range.begin(), range.end(), 0);
+
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+ // calculate the indices used in the current thread
+ const std::size_t col = static_cast(static_cast(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
+ const std::size_t row = static_cast(0.5 * static_cast(2 * (idx - col * blocked_dept) + col * col + col));
+
+ const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+ const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
+
+ // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+ // create a thread private array used for internal caching
+ std::array, INTERNAL_BLOCK_SIZE> temp{};
+
+ // iterate over all features
+ for (std::size_t dim = 0; dim < num_features; ++dim) {
+ // perform the feature reduction calculation
+ for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+ for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+ const std::size_t global_row = row_idx + static_cast(internal_row);
+ const std::size_t global_col = col_idx + static_cast(internal_col);
+
+ temp[internal_row][internal_col] += detail::feature_reduce(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+ }
+ }
+ }
+
+ // apply the remaining part of the kernel function and store the value in the output kernel matrix
+ for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+ for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+ // calculate the indices to access the kernel matrix (the part stored on the current device)
+ const std::size_t global_row = row_idx + static_cast(internal_row);
+ const std::size_t global_col = col_idx + static_cast(internal_col);
+
+ // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+ if (global_row < dept && global_col < dept && global_row >= global_col) {
+ real_type temp_ij = temp[internal_row][internal_col];
+ temp_ij = detail::apply_kernel_function(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
+ // apply the cost on the diagonal
+ if (global_row == global_col) {
+ temp_ij += cost;
+ }
+ kernel_matrix[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
+ }
+ }
+ }
+ });
+}
+
+} // namespace plssvm::hpx::detail
+
+#endif // PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
new file mode 100644
index 000000000..eef6b809d
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -0,0 +1,133 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for performing a matrix-matrix multiplication using an implicit kernel matrix.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/detail/utility.hpp" // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp" // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp" // plssvm::real_type
+#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT
+#include "plssvm/detail/operators.hpp" // overloaded arithmetic operations for a plssvm::matrix
+#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type
+#include "plssvm/kernel_functions.hpp" // plssvm::kernel_function
+#include "plssvm/matrix.hpp" // aos_matrix
+
+#include // std::array
+#include // std::ceil
+#include // std::size_t, std::sqrt
+#include // hpx::execution::par_unseq
+#include // hpx::for_each
+#include // std::iota
+#include // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Perform an implicit BLAS SYMM-like operation: `C = alpha * A * B + C` where `A` is the implicitly calculated kernel matrix using the @p kernel function (never actually stored, reducing the amount of needed global memory), @p B and @p C are matrices, and @p alpha is a scalar.
+ * @tparam kernel the compile-time kernel function to use
+ * @tparam Args the types of the potential additional arguments for the @p kernel function
+ * @param[in] alpha the scalar alpha value
+ * @param[in] q the `q` vector
+ * @param[in] data the data matrix
+ * @param[in] QA_cost he bottom right matrix entry multiplied by cost
+ * @param[in] cost 1 / the cost parameter in the C-SVM
+ * @param[in] B the matrix @p B
+ * @param[in] beta the beta alpha value
+ * @param[in,out] C the matrix @p C
+ * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
+ */
+template
+inline void device_kernel_assembly_symm(const real_type alpha, const std::vector &q, const soa_matrix &data, const real_type QA_cost, const real_type cost, const soa_matrix &B, const real_type beta, soa_matrix &C, Args... kernel_function_parameter) {
+ PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+ PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
+ PLSSVM_ASSERT(B.shape() == C.shape(), "The matrices B and C must have the same shape!");
+ PLSSVM_ASSERT(B.num_cols() == q.size(), "The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), q.size());
+
+ using namespace operators;
+
+ // alpha * A * B + beta * C
+ C *= beta;
+
+ // calculate constants
+ const std::size_t dept = q.size();
+ const auto blocked_dept = static_cast(std::ceil(static_cast(dept) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_features = data.num_cols();
+ const std::size_t num_classes = B.num_rows();
+
+ // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+ const auto INTERNAL_BLOCK_SIZE_uz = static_cast(INTERNAL_BLOCK_SIZE);
+ const auto PADDING_SIZE_uz = static_cast(PADDING_SIZE);
+
+ // define range over which should be iterated
+ std::vector range(blocked_dept * (blocked_dept + 1) / 2);
+ std::iota(range.begin(), range.end(), 0);
+
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+ // calculate the indices used in the current thread
+ const std::size_t col = static_cast(static_cast(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
+ const std::size_t row = static_cast(0.5 * static_cast(2 * (idx - col * blocked_dept) + col * col + col));
+
+ const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+ const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
+
+ // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+ // create a thread private array used for internal caching
+ std::array, INTERNAL_BLOCK_SIZE> temp{};
+
+ // iterate over all features
+ for (std::size_t dim = 0; dim < num_features; ++dim) {
+ for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+ for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+ const std::size_t global_row = row_idx + static_cast(internal_row);
+ const std::size_t global_col = col_idx + static_cast(internal_col);
+
+ temp[internal_row][internal_col] += detail::feature_reduce(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+ }
+ }
+ }
+
+ // apply the remaining part of the kernel function and store the value in the output kernel matrix
+ for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+ for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+ const std::size_t global_row = row_idx + static_cast(internal_row);
+ const std::size_t global_col = col_idx + static_cast(internal_col);
+
+ // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+ if (global_row < dept && global_col < dept && global_row >= global_col) {
+ real_type temp_ij = temp[internal_row][internal_col];
+ temp_ij = detail::apply_kernel_function(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
+ // apply the cost on the diagonal
+ if (global_row == global_col) {
+ temp_ij += cost;
+ // calculate the values of alpha * A * B
+ for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+ atomic_ref{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+ }
+ } else {
+ // calculate the values of alpha * A * B
+ for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+ atomic_ref{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
+ // symmetry
+ atomic_ref{ C.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+ }
+ }
+ }
+ }
+ }
+ });
+}
+
+} // namespace plssvm::hpx::detail
+
+#endif // PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/kernel_functions.hpp b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
new file mode 100644
index 000000000..b7be1cb16
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
@@ -0,0 +1,159 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implement the different kernel functions for the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp" // plssvm::real_type
+#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type
+
+#define PLSSVM_HPX_KERNEL_FUNCTION
+
+#include // std::abs, std::pow, std::exp, std::tanh
+#include // std::numeric_limits::min
+
+namespace plssvm::hpx::detail {
+
+//***************************************************//
+// feature reductions //
+//***************************************************//
+
+/**
+ * @brief Compute the default feature reduction, i.e., a simple dot-product.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+ return val1 * val2;
+}
+
+/**
+ * @brief Compute the feature reduction for the radial basis function kernel function, i.e., the squared Euclidean distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+ const real_type d = val1 - val2;
+ return d * d;
+}
+
+/**
+ * @brief Compute the feature reduction for the laplacian kernel function, i.e., the Manhattan distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+ return std::abs(val1 - val2);
+}
+
+/**
+ * @brief Compute the feature reduction for the chi-squared kernel function.
+ * @note Be sure that the denominator isn't 0.0 which may be the case for padding values.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+ const real_type d = val1 - val2;
+ return (real_type{ 1.0 } / (val1 + val2 + std::numeric_limits::min())) * d * d;
+}
+
+//***************************************************//
+// kernel functions //
+//***************************************************//
+
+/**
+ * @brief Unimplemented base-template for all kernel functions.
+ * @return the result value (`[[nodiscard]]`)
+ */
+template
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(real_type, Args...);
+
+/**
+ * @brief Compute the linear kernel function using @p value.
+ * @param[in] value the value to apply the linear kernel function to
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(const real_type value) {
+ return value;
+}
+
+/**
+ * @brief Compute the polynomial kernel function using @p value.
+ * @param[in] value the value to apply the polynomial kernel function to
+ * @param[in] degree the degree parameter of the polynomial kernel function
+ * @param[in] gamma the gamma parameter of the polynomial kernel function
+ * @param[in] coef0 the coef0 parameter of the polynomial kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(const real_type value, const int degree, const real_type gamma, const real_type coef0) {
+ return std::pow(gamma * value + coef0, (real_type) degree);
+}
+
+/**
+ * @brief Compute the radial basis function kernel function using @p value.
+ * @param[in] value the value to apply the rbf kernel function to
+ * @param[in] gamma the gamma parameter of the rbf kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(const real_type value, const real_type gamma) {
+ return std::exp(-gamma * value);
+}
+
+/**
+ * @brief Compute the sigmoid kernel function using @p value.
+ * @param[in] value the value to apply the sigmoid kernel function to
+ * @param[in] gamma the gamma parameter of the kernel kernel function
+ * @param[in] coef0 the coef0 parameter of the kernel kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(const real_type value, const real_type gamma, const real_type coef0) {
+ return std::tanh(gamma * value + coef0);
+}
+
+/**
+ * @brief Compute the laplacian function kernel function using @p value.
+ * @param[in] value the value to apply the laplacian kernel function to
+ * @param[in] gamma the gamma parameter of the laplacian kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(const real_type value, const real_type gamma) {
+ return std::exp(-gamma * value);
+}
+
+/**
+ * @brief Compute the chi-squared function kernel function using @p value.
+ * @param[in] value the value to apply the chi-squared kernel function to
+ * @param[in] gamma the gamma parameter of the chi-squared kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(const real_type value, const real_type gamma) {
+ return std::exp(-gamma * value);
+}
+
+} // namespace plssvm::hpx::detail
+
+#endif // PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
new file mode 100644
index 000000000..7b153d889
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -0,0 +1,250 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines the functions used for prediction for the C-SVM using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/detail/utility.hpp" // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp" // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp" // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp" // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/shape.hpp" // plssvm::shape
+
+#include // std::array
+#include // std::fma
+#include // std::size_t
+#include // hpx::execution::par_unseq
+#include // hpx::for_each
+#include // std::iota
+#include // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Calculate the `w` vector used to speedup the prediction using the linear kernel function.
+ * @param[out] w the vector to speedup the linear prediction
+ * @param[in] alpha the previously learned weights
+ * @param[in] support_vectors the support vectors
+ */
+inline void device_kernel_w_linear(soa_matrix &w, const aos_matrix &alpha, const soa_matrix &support_vectors) {
+ PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
+ PLSSVM_ASSERT(w.shape() == (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }), "Shape mismatch: {} vs {}!", w.shape(), (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }));
+
+ // calculate constants
+ const std::size_t num_features = support_vectors.num_cols();
+ const auto blocked_num_features = static_cast(std::ceil(static_cast(num_features) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_classes = alpha.num_rows();
+ const auto blocked_num_classes = static_cast(std::ceil(static_cast(num_classes) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_support_vectors = support_vectors.num_rows();
+
+ // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+ const auto INTERNAL_BLOCK_SIZE_uz = static_cast(INTERNAL_BLOCK_SIZE);
+ const auto PADDING_SIZE_uz = static_cast(PADDING_SIZE);
+
+ // define range over which should be iterated
+ std::vector range(blocked_num_features * blocked_num_classes);
+ std::iota(range.begin(), range.end(), 0);
+
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+ // calculate the indices used in the current thread
+ const std::size_t feature = idx / blocked_num_classes;
+ const std::size_t c = idx % blocked_num_classes;
+
+ const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
+ const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
+
+ // create a thread private array used for internal caching
+ std::array, INTERNAL_BLOCK_SIZE> temp{};
+
+ // iterate over all features
+ for (std::size_t sv = 0; sv < num_support_vectors; ++sv) {
+ // perform the feature reduction calculation
+ for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+ for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+ const std::size_t global_feature_idx = feature_idx + static_cast(internal_feature);
+ const std::size_t global_class_idx = class_idx + static_cast(internal_class);
+
+ temp[internal_feature][internal_class] += alpha.data()[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv] * support_vectors.data()[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv];
+ }
+ }
+ }
+
+ // update global array with local one
+ for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+ for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+ const std::size_t global_feature_idx = feature_idx + static_cast(internal_feature);
+ const std::size_t global_class_idx = class_idx + static_cast(internal_class);
+
+ w.data()[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
+ }
+ }
+ });
+}
+
+/**
+ * @brief Predict the @p predict_points_d using the linear kernel speeding up the calculation using the @p w_d vector.
+ * @param[out] prediction the predicted values
+ * @param[in] w the vector to speedup the calculations
+ * @param[in] rho the previously learned bias
+ * @param[in] predict_points the data points to predict
+ */
+inline void device_kernel_predict_linear(aos_matrix &prediction, const soa_matrix &w, const std::vector &rho, const soa_matrix &predict_points) {
+ PLSSVM_ASSERT(w.num_rows() == rho.size(), "Size mismatch: {} vs {}!", w.num_rows(), rho.size());
+ PLSSVM_ASSERT(w.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", w.num_cols(), predict_points.num_cols());
+ PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), w.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), w.num_rows() }));
+
+ // calculate constants
+ const std::size_t num_predict_points = predict_points.num_rows();
+ const auto blocked_num_predict_points = static_cast(std::ceil(static_cast(num_predict_points) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_classes = prediction.num_cols();
+ const auto blocked_num_classes = static_cast(std::ceil(static_cast(num_classes) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_features = predict_points.num_cols();
+
+ // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+ const auto INTERNAL_BLOCK_SIZE_uz = static_cast(INTERNAL_BLOCK_SIZE);
+ const auto PADDING_SIZE_uz = static_cast(PADDING_SIZE);
+
+ // define range over which should be iterated
+ std::vector range(blocked_num_predict_points * blocked_num_classes);
+ std::iota(range.begin(), range.end(), 0);
+
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+ // calculate the indices used in the current thread
+ const std::size_t pp = idx / blocked_num_classes;
+ const std::size_t c = idx % blocked_num_classes;
+
+ const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
+ const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
+
+ // create a thread private array used for internal caching
+ std::array, INTERNAL_BLOCK_SIZE> temp{};
+
+ // iterate over all features
+ for (std::size_t dim = 0; dim < num_features; ++dim) {
+ // perform the feature reduction calculation
+ for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+ for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+ const std::size_t global_pp_idx = pp_idx + static_cast(internal_pp);
+ const std::size_t global_class_idx = class_idx + static_cast(internal_class);
+
+ temp[internal_pp][internal_class] += w.data()[dim * (num_classes + PADDING_SIZE_uz) + global_class_idx] * predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx];
+ }
+ }
+ }
+
+ // perform the dot product calculation
+ for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+ for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+ const std::size_t global_pp_idx = pp_idx + static_cast(internal_pp);
+ const std::size_t global_class_idx = class_idx + static_cast(internal_class);
+
+ if (global_pp_idx < num_predict_points && global_class_idx < num_classes) {
+ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_pp][internal_class] - rho.data()[global_class_idx];
+ }
+ }
+ }
+ });
+}
+
+/**
+ * @brief Predict the @p predict_points_d using the @p kernel_function.
+ * @tparam kernel the type of the used kernel function
+ * @tparam Args the types of the parameters necessary for the specific kernel function
+ * @param[out] prediction the predicted values
+ * @param[in] alpha the previously learned weights
+ * @param[in] rho the previously learned bias
+ * @param[in] support_vectors the support vectors
+ * @param[in] predict_points the data points to predict
+ * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
+ */
+template
+inline void device_kernel_predict(aos_matrix &prediction, const aos_matrix &alpha, const std::vector &rho, const soa_matrix &support_vectors, const soa_matrix &predict_points, Args... kernel_function_parameter) {
+ PLSSVM_ASSERT(alpha.num_rows() == rho.size(), "Size mismatch: {} vs {}!", alpha.num_rows(), rho.size());
+ PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
+ PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", support_vectors.num_cols(), predict_points.num_cols());
+ PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }));
+
+ // calculate constants
+ const std::size_t num_classes = alpha.num_rows();
+ const std::size_t num_support_vectors = support_vectors.num_rows();
+ const auto blocked_num_support_vectors = static_cast(std::ceil(static_cast(num_support_vectors) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_predict_points = predict_points.num_rows();
+ const auto blocked_num_predict_points = static_cast(std::ceil(static_cast(num_predict_points) / INTERNAL_BLOCK_SIZE));
+ const std::size_t num_features = predict_points.num_cols();
+
+ // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+ const auto INTERNAL_BLOCK_SIZE_uz = static_cast(INTERNAL_BLOCK_SIZE);
+ const auto PADDING_SIZE_uz = static_cast(PADDING_SIZE);
+
+ // define range over which should be iterated
+ std::vector range(blocked_num_predict_points * blocked_num_support_vectors);
+ std::iota(range.begin(), range.end(), 0);
+
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+ // calculate the indices used in the current thread
+ const std::size_t pp = idx / blocked_num_support_vectors;
+ const std::size_t sv = idx % blocked_num_support_vectors;
+
+ const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
+ const std::size_t sv_idx = sv * INTERNAL_BLOCK_SIZE_uz;
+
+ // create a thread private array used for internal caching
+ std::array, INTERNAL_BLOCK_SIZE> temp{};
+
+ // iterate over all features
+ for (std::size_t dim = 0; dim < num_features; ++dim) {
+ // perform the feature reduction calculation
+ for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+ for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+ const std::size_t global_pp_idx = pp_idx + static_cast(internal_pp);
+ const std::size_t global_sv_idx = sv_idx + static_cast(internal_sv);
+
+ temp[internal_pp][internal_sv] += detail::feature_reduce(support_vectors.data()[dim * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx],
+ predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx]);
+ }
+ }
+ }
+
+ // update temp using the respective kernel function
+ for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+ for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+ temp[internal_pp][internal_sv] = detail::apply_kernel_function(temp[internal_pp][internal_sv], kernel_function_parameter...);
+ }
+ }
+
+ // add results to prediction
+ for (std::size_t a = 0; a < num_classes; ++a) {
+ for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+ for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+ const std::size_t global_pp_idx = pp_idx + static_cast(internal_pp);
+ const std::size_t global_sv_idx = sv_idx + static_cast(internal_sv);
+
+ // be sure to not perform out of bounds accesses
+ if (global_pp_idx < num_predict_points && global_sv_idx < num_support_vectors) {
+ if (global_sv_idx == 0) {
+ atomic_ref{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } += -rho.data()[a];
+ }
+ atomic_ref{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } +=
+ temp[internal_pp][internal_sv] * alpha.data()[a * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx];
+ }
+ }
+ }
+ }
+ });
+}
+
+} // namespace plssvm::hpx::detail
+
+#endif // PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
diff --git a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
index ee4f6f15e..61729d9b8 100644
--- a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
@@ -6,7 +6,7 @@
* @license This file is part of the PLSSVM project which is released under the MIT license.
* See the LICENSE.md file in the project root for full license information.
*
- * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the CUDA backend.
+ * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the OpenMP backend.
*/
#ifndef PLSSVM_BACKENDS_OPENMP_KERNEL_CG_EXPLICIT_BLAS_HPP_
diff --git a/include/plssvm/core.hpp b/include/plssvm/core.hpp
index 4311a189f..4e1fd1be1 100644
--- a/include/plssvm/core.hpp
+++ b/include/plssvm/core.hpp
@@ -76,6 +76,12 @@ namespace plssvm::openmp { }
/// Namespace containing OpenMP backend specific implementation details. **Should not** directly be used by users.
namespace plssvm::openmp::detail { }
+/// Namespace containing the C-SVM using the HPX backend.
+namespace plssvm::hpx { }
+
+/// Namespace containing HPX backend specific implementation details. **Should not** directly be used by users.
+namespace plssvm::hpx::detail { }
+
/// Namespace containing the C-SVM using the stdpar backend.
namespace plssvm::stdpar { }
diff --git a/include/plssvm/csvm_factory.hpp b/include/plssvm/csvm_factory.hpp
index 01a2769ec..a1272a5e0 100644
--- a/include/plssvm/csvm_factory.hpp
+++ b/include/plssvm/csvm_factory.hpp
@@ -28,6 +28,9 @@
#if defined(PLSSVM_HAS_STDPAR_BACKEND)
#include "plssvm/backends/stdpar/csvm.hpp" // plssvm::stdpar::csvm, plssvm::csvm_backend_exists_v
#endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ #include "plssvm/backends/HPX/csvm.hpp" // plssvm::hpx::csvm, plssvm::csvm_backend_exists_v
+#endif
#if defined(PLSSVM_HAS_CUDA_BACKEND)
#include "plssvm/backends/CUDA/csvm.hpp" // plssvm::cuda::csvm, plssvm::csvm_backend_exists_v
#endif
@@ -130,6 +133,8 @@ template
return make_csvm_default_impl(std::forward(args)...);
case backend_type::stdpar:
return make_csvm_default_impl(std::forward(args)...);
+ case backend_type::hpx:
+ return make_csvm_default_impl(std::forward(args)...);
case backend_type::cuda:
return make_csvm_default_impl(std::forward(args)...);
case backend_type::hip:
diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp
index 2a114f0f0..2b96416ae 100644
--- a/include/plssvm/detail/cmd/parser_predict.hpp
+++ b/include/plssvm/detail/cmd/parser_predict.hpp
@@ -37,7 +37,7 @@ struct parser_predict {
*/
parser_predict(int argc, char **argv);
- /// The used backend: automatic (depending on the specified target_platforms), OpenMP, stdpar, CUDA, HIP, OpenCL, or SYCL.
+ /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
backend_type backend{ backend_type::automatic };
/// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
target_platform target{ target_platform::automatic };
diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp
index 70f0c03e2..a723fa82e 100644
--- a/include/plssvm/detail/cmd/parser_train.hpp
+++ b/include/plssvm/detail/cmd/parser_train.hpp
@@ -53,7 +53,7 @@ struct parser_train {
/// The multi-class classification strategy used.
classification_type classification{ classification_type::oaa };
- /// The used backend: automatic (depending on the specified target_platforms), OpenMP, stdpar, CUDA, HIP, OpenCL, or SYCL.
+ /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
backend_type backend{ backend_type::automatic };
/// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
target_platform target{ target_platform::automatic };
diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index 692c362b5..69a6dab24 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -30,6 +30,12 @@
#include // std::string
#include // std::vector
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ #include // ::hpx::post
+ #include // ::hpx::{start, stop, finalize}
+ #include // ::hpx::{is_running, is_stopped}
+#endif
+
namespace plssvm::environment {
/**
@@ -100,7 +106,8 @@ namespace detail {
* @return the respective environment status (`[[nodiscard]]`)
*/
[[nodiscard]] inline status determine_status_from_initialized_finalized_flags(const bool is_initialized, const bool is_finalized) {
- if (!is_initialized && !is_finalized) {
+ if (!is_initialized) {
+ // Note: ::hpx::is_stopped does return true even before calling finalize once
return status::uninitialized;
} else if (is_initialized && !is_finalized) {
return status::initialized;
@@ -148,6 +155,14 @@ template
case backend_type::sycl:
// no environment necessary to manage these backends
return status::unnecessary;
+ case backend_type::hpx:
+ {
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ return detail::determine_status_from_initialized_finalized_functions<::hpx::is_running, ::hpx::is_stopped>();
+#else
+ return status::unnecessary;
+#endif
+ }
}
// should never be reached!
::plssvm::detail::unreachable();
@@ -177,7 +192,12 @@ namespace detail {
inline void initialize_backend([[maybe_unused]] const backend_type backend) {
PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be initialized!");
// Note: must be implemented for the backends that need environmental setup
- // nothing to do for all available backends
+ // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ if (backend == backend_type::hpx) {
+ ::hpx::start(nullptr, 0, nullptr);
+ }
+#endif
}
/**
@@ -189,7 +209,12 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend) {
inline void initialize_backend([[maybe_unused]] const backend_type backend, [[maybe_unused]] int &argc, [[maybe_unused]] char **argv) {
PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be initialized!");
// Note: must be implemented for the backends that need environmental setup
- // nothing to do for all available backends
+ // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ if (backend == backend_type::hpx) {
+ ::hpx::start(nullptr, argc, argv);
+ }
+#endif
}
/**
@@ -199,7 +224,13 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend, [[ma
inline void finalize_backend([[maybe_unused]] const backend_type backend) {
PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be finalized!");
// Note: must be implemented for the backends that need environmental setup
- // nothing to do for all available backends
+ // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ if (backend == backend_type::hpx) {
+ ::hpx::post([] { ::hpx::finalize(); });
+ ::hpx::stop();
+ }
+#endif
}
/**
diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 079b6ca00..ff28028c8 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -75,8 +75,15 @@ int main(int argc, char *argv[]) {
// check whether SYCL is used as backend (it is either requested directly or as automatic backend)
const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
+ // check whether HPX is used as backend (it is either requested directly or as automatic backend)
+ const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+
// initialize environments if necessary
- environment_guard = std::make_unique();
+ std::vector backends_to_initialize{};
+ if (use_hpx_as_backend) {
+ backends_to_initialize.push_back(plssvm::backend_type::hpx);
+ }
+ environment_guard = std::make_unique(backends_to_initialize);
// create default csvm
const std::unique_ptr svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type)
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 7f99409c7..32ac09d71 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -72,8 +72,15 @@ int main(int argc, char *argv[]) {
// check whether SYCL is used as backend (it is either requested directly or as automatic backend)
const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
+ // check whether HPX is used as backend (it is either requested directly or as automatic backend)
+ const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+
// initialize environments if necessary
- environment_guard = std::make_unique();
+ std::vector backends_to_initialize{};
+ if (use_hpx_as_backend) {
+ backends_to_initialize.push_back(plssvm::backend_type::hpx);
+ }
+ environment_guard = std::make_unique(backends_to_initialize);
// create SVM
const std::unique_ptr svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type)
diff --git a/src/plssvm/backend_types.cpp b/src/plssvm/backend_types.cpp
index 0d01bb837..34789a764 100644
--- a/src/plssvm/backend_types.cpp
+++ b/src/plssvm/backend_types.cpp
@@ -1,6 +1,7 @@
/**
* @author Alexander Van Craen
* @author Marcel Breyer
+ * @author Alexander Strack
* @copyright 2018-today The PLSSVM project - All Rights Reserved
* @license This file is part of the PLSSVM project which is released under the MIT license.
* See the LICENSE.md file in the project root for full license information.
@@ -35,6 +36,9 @@ std::vector list_available_backends() {
#if defined(PLSSVM_HAS_STDPAR_BACKEND)
available_backends.push_back(backend_type::stdpar);
#endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+ available_backends.push_back(backend_type::hpx);
+#endif
#if defined(PLSSVM_HAS_CUDA_BACKEND)
available_backends.push_back(backend_type::cuda);
#endif
@@ -61,7 +65,7 @@ backend_type determine_default_backend(const std::vector &availabl
decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::stdpar } },
- decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::stdpar } }
+ decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::hpx, backend_type::stdpar } }
};
// return the default backend based on the previously defined decision order
@@ -87,6 +91,8 @@ std::ostream &operator<<(std::ostream &out, const backend_type backend) {
return out << "openmp";
case backend_type::stdpar:
return out << "stdpar";
+ case backend_type::hpx:
+ return out << "hpx";
case backend_type::cuda:
return out << "cuda";
case backend_type::hip:
@@ -110,6 +116,8 @@ std::istream &operator>>(std::istream &in, backend_type &backend) {
backend = backend_type::openmp;
} else if (str == "stdpar") {
backend = backend_type::stdpar;
+ } else if (str == "hpx") {
+ backend = backend_type::hpx;
} else if (str == "cuda") {
backend = backend_type::cuda;
} else if (str == "hip") {
diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
new file mode 100644
index 000000000..8ebde5e46
--- /dev/null
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -0,0 +1,63 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+## See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+list(APPEND CMAKE_MESSAGE_INDENT "HPX: ")
+
+# check if HPX can be enabled
+message(CHECK_START "Checking for HPX backend")
+
+find_package(HPX 1.9.0)
+
+if (NOT HPX_FOUND)
+ message(CHECK_FAIL "not found")
+ if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+ message(SEND_ERROR "Cannot find requested backend: HPX!")
+ endif ()
+ return()
+else ()
+ if (NOT DEFINED PLSSVM_CPU_TARGET_ARCHS)
+ if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+ message(SEND_ERROR "Found requested HPX backend, but no \"cpu\" targets were specified!")
+ else ()
+ message(STATUS "Found HPX backend, but no \"cpu\" targets were specified!")
+ endif ()
+ message(CHECK_FAIL "skipped")
+ return()
+ endif ()
+endif ()
+message(CHECK_PASS "found ")
+
+# explicitly set sources
+set(PLSSVM_HPX_SOURCES
+ ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
+ ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp
+ ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
+)
+
+# set target properties
+set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
+add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx)
+
+# additional compilation flags
+target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$:-Wconversion>)
+
+# link base library against HPX library
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME})
+
+# set compile definition that the HPX backend is available
+target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_HAS_HPX_BACKEND)
+
+# link against interface library
+target_link_libraries(${PLSSVM_ALL_LIBRARY_NAME} INTERFACE ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+
+# mark backend library as install target
+append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+
+# generate summary string
+set(PLSSVM_HPX_BACKEND_SUMMARY_STRING " - HPX: cpu " PARENT_SCOPE)
+
+list(POP_BACK CMAKE_MESSAGE_INDENT)
diff --git a/src/plssvm/backends/HPX/csvm.cpp b/src/plssvm/backends/HPX/csvm.cpp
new file mode 100644
index 000000000..c6adff43d
--- /dev/null
+++ b/src/plssvm/backends/HPX/csvm.cpp
@@ -0,0 +1,265 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ * See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/HPX/csvm.hpp"
+
+#include "plssvm/backends/HPX/exceptions.hpp" // plssvm::hpx::backend_exception
+#include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp" // plssvm::hpx::detail::device_kernel_symm
+#include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp" // plssvm::hpx::detail::device_kernel_assembly
+#include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp" // plssvm::hpx::detail::device_kernel_assembly_symm
+#include "plssvm/backends/HPX/kernel/predict_kernel.hpp" // plssvm::hpx::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
+#include "plssvm/constants.hpp" // plssvm::real_type
+#include "plssvm/csvm.hpp" // plssvm::csvm
+#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT
+#include "plssvm/detail/data_distribution.hpp" // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
+#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp" // plssvm::detail::{move_only_any, move_only_any_cast}
+#include "plssvm/detail/utility.hpp" // plssvm::detail::{get_system_memory, unreachable}
+#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp" // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/parameter.hpp" // plssvm::parameter
+#include "plssvm/shape.hpp" // plssvm::shape
+#include "plssvm/solver_types.hpp" // plssvm::solver_type
+#include "plssvm/target_platforms.hpp" // plssvm::target_platform
+
+#include // std::size_t
+#include // std::tuple, std::make_tuple
+#include // std::move
+#include // std::vector
+
+namespace plssvm::hpx {
+
+csvm::csvm(parameter params) :
+ csvm{ plssvm::target_platform::automatic, params } { }
+
+csvm::csvm(const target_platform target, parameter params) :
+ ::plssvm::csvm{ params } {
+ this->init(target);
+}
+
+void csvm::init(const target_platform target) {
+ // check if supported target platform has been selected
+ if (target != target_platform::automatic && target != target_platform::cpu) {
+ throw backend_exception{ fmt::format("Invalid target platform '{}' for the HPX backend!", target) };
+ }
+ // the CPU target must be available
+#if !defined(PLSSVM_HAS_CPU_TARGET)
+ throw backend_exception{ "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!" };
+#endif
+
+ plssvm::detail::log(verbosity_level::full,
+ "\nUsing HPX ({}) as backend with {} thread(s).\n\n",
+ plssvm::detail::tracking::tracking_entry{ "dependencies", "hpx_version", detail::get_hpx_version() },
+ plssvm::detail::tracking::tracking_entry{ "backend", "num_threads", detail::get_num_threads() });
+ PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::hpx }));
+ PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", plssvm::target_platform::cpu }));
+
+ // update the target platform
+ target_ = plssvm::target_platform::cpu;
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const {
+ return { ::plssvm::detail::get_system_memory() };
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const {
+ return this->get_device_memory();
+}
+
+//***************************************************//
+// fit //
+//***************************************************//
+
+std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const solver_type solver, const parameter ¶ms, const soa_matrix &A, const std::vector &q_red, const real_type QA_cost) const {
+ PLSSVM_ASSERT(solver != solver_type::automatic, "An explicit solver type must be provided instead of solver_type::automatic!");
+ PLSSVM_ASSERT(!A.empty(), "The matrix to setup on the devices must not be empty!");
+ PLSSVM_ASSERT(A.is_padded(), "The matrix to setup on the devices must be padded!");
+ PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+ PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
+
+ std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
+ ::hpx::future wait = ::hpx::async([&]() {
+ const real_type cost = real_type{ 1.0 } / params.cost;
+
+ switch (solver) {
+ case solver_type::automatic:
+ // unreachable
+ break;
+ case solver_type::cg_explicit:
+ {
+ const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
+ std::vector kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0)); // only explicitly store the upper triangular matrix
+ switch (params.kernel_type) {
+ case kernel_function_type::linear:
+ detail::device_kernel_assembly(q_red, kernel_matrix, A, QA_cost, cost);
+ break;
+ case kernel_function_type::polynomial:
+ detail::device_kernel_assembly(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get(params.gamma), params.coef0);
+ break;
+ case kernel_function_type::rbf:
+ detail::device_kernel_assembly(q_red, kernel_matrix, A, QA_cost, cost, std::get(params.gamma));
+ break;
+ case kernel_function_type::sigmoid:
+ detail::device_kernel_assembly(q_red, kernel_matrix, A, QA_cost, cost, std::get(params.gamma), params.coef0);
+ break;
+ case kernel_function_type::laplacian:
+ detail::device_kernel_assembly(q_red, kernel_matrix, A, QA_cost, cost, std::get(params.gamma));
+ break;
+ case kernel_function_type::chi_squared:
+ detail::device_kernel_assembly(q_red, kernel_matrix, A, QA_cost, cost, std::get(params.gamma));
+ break;
+ }
+
+ kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
+ }
+ break;
+ case solver_type::cg_implicit:
+ {
+ // simply return data since in implicit we don't assembly the kernel matrix here!
+ kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
+ }
+ break;
+ }
+ });
+ // wait until operation is completed
+ wait.get();
+ return kernel_matrices_parts;
+}
+
+void csvm::blas_level_3(const solver_type solver, const real_type alpha, const std::vector<::plssvm::detail::move_only_any> &A, const soa_matrix &B, const real_type beta, soa_matrix &C) const {
+ PLSSVM_ASSERT(solver != solver_type::automatic, "An explicit solver type must be provided instead of solver_type::automatic!");
+ PLSSVM_ASSERT(A.size() == 1, "Not enough kernel matrix parts ({}) for the available number of devices (1)!", A.size());
+ PLSSVM_ASSERT(!B.empty(), "The B matrix must not be empty!");
+ PLSSVM_ASSERT(B.is_padded(), "The B matrix must be padded!");
+ PLSSVM_ASSERT(!C.empty(), "The C matrix must not be empty!");
+ PLSSVM_ASSERT(C.is_padded(), "The C matrix must be padded!");
+ PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
+ PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
+
+ ::hpx::future wait = ::hpx::async([&]() {
+ switch (solver) {
+ case solver_type::automatic:
+ // unreachable
+ break;
+ case solver_type::cg_explicit:
+ {
+ const std::size_t num_rhs = B.shape().x;
+ const std::size_t num_rows = B.shape().y;
+
+ const auto &explicit_A = ::plssvm::detail::move_only_any_cast &>(A.front());
+ PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+
+ detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
+ }
+ break;
+ case solver_type::cg_implicit:
+ {
+ const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast, parameter, std::vector, real_type> &>(A.front());
+ PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
+ PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+ const real_type cost = real_type{ 1.0 } / params.cost;
+
+ switch (params.kernel_type) {
+ case kernel_function_type::linear:
+ detail::device_kernel_assembly_symm(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
+ break;
+ case kernel_function_type::polynomial:
+ detail::device_kernel_assembly_symm(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get(params.gamma), params.coef0);
+ break;
+ case kernel_function_type::rbf:
+ detail::device_kernel_assembly_symm(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get(params.gamma));
+ break;
+ case kernel_function_type::sigmoid:
+ detail::device_kernel_assembly_symm(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get(params.gamma), params.coef0);
+ break;
+ case kernel_function_type::laplacian:
+ detail::device_kernel_assembly_symm(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get(params.gamma));
+ break;
+ case kernel_function_type::chi_squared:
+ detail::device_kernel_assembly_symm(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get