diff --git a/.clang-format b/.clang-format
index 30a5ef1db..a057d0bef 100644
--- a/.clang-format
+++ b/.clang-format
@@ -79,7 +79,7 @@ IncludeBlocks: Regroup
 IncludeCategories:
   - Regex: '^"plssvm/'
     Priority: 1
-  - Regex: '^"(cuda|hip|CL|sycl|omp)'
+  - Regex: '^"(cuda|hip|CL|sycl|omp|hpx)'
     Priority: 2
   - Regex: '^"(tests|bindings)/'
     Priority: 3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c7de08b3..593b7b8f5 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
 ## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
 ## License: This file is part of the PLSSVM project which is released under the MIT license.
 ##          See the LICENSE.md file in the project root for full license information.
@@ -376,6 +376,13 @@ if (PLSSVM_ENABLE_STDPAR_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_STDPAR_BACKEND)
     add_subdirectory(src/plssvm/backends/stdpar)
 endif ()
 
+## check for HPX backend
+set(PLSSVM_ENABLE_HPX_BACKEND AUTO CACHE STRING "Enable HPX Backend")
+set_property(CACHE PLSSVM_ENABLE_HPX_BACKEND PROPERTY STRINGS AUTO ON OFF)
+if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_HPX_BACKEND)
+    add_subdirectory(src/plssvm/backends/HPX)
+endif ()
+
 ## check for CUDA backend
 set(PLSSVM_ENABLE_CUDA_BACKEND AUTO CACHE STRING "Enable CUDA Backend")
 set_property(CACHE PLSSVM_ENABLE_CUDA_BACKEND PROPERTY STRINGS AUTO ON OFF)
@@ -705,6 +712,10 @@ if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
     message(STATUS "${PLSSVM_STDPAR_BACKEND_SUMMARY_STRING}")
     list(APPEND PLSSVM_BACKEND_NAME_LIST "stdpar")
 endif ()
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+    message(STATUS "${PLSSVM_HPX_BACKEND_SUMMARY_STRING}")
+    list(APPEND PLSSVM_BACKEND_NAME_LIST "hpx")
+endif ()
 if (TARGET ${PLSSVM_CUDA_BACKEND_LIBRARY_NAME})
     message(STATUS "${PLSSVM_CUDA_BACKEND_SUMMARY_STRING}")
     list(APPEND PLSSVM_BACKEND_NAME_LIST "cuda")
@@ -852,6 +863,7 @@ install(FILES
         "${PROJECT_BINARY_DIR}/plssvmHIPTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenCLTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenMPTargets.cmake"
+        "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmHPXTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmAdaptiveCppTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmDPCPPTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmstdparTargets.cmake"
diff --git a/CMakePresets.json b/CMakePresets.json
index 8e4925dd0..c6bf7373f 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -2,6 +2,7 @@
   "version": 6,
   "include": [
     "cmake/presets/openmp.json",
+    "cmake/presets/hpx.json",
     "cmake/presets/stdpar.json",
     "cmake/presets/stdpar_gcc.json",
     "cmake/presets/stdpar_nvhpc.json",
@@ -15,4 +16,4 @@
     "cmake/presets/dpcpp.json",
     "cmake/presets/all.json"
   ]
-}
\ No newline at end of file
+}
diff --git a/README.md b/README.md
index 566ac248a..394dd8e04 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,7 @@ The main highlights of our SVM implementations are:
 1. Drop-in replacement for LIBSVM's `svm-train`, `svm-predict`, and `svm-scale` (some features currently not implemented).
 2. Support of multiple different programming frameworks for parallelization (also called backends in our PLSSVM implementation) which allows us to target GPUs and CPUs from different vendors like NVIDIA, AMD, or Intel:
    - [OpenMP](https://www.openmp.org/)
+   - [HPX](https://hpx.stellar-group.org/)
    - [stdpar](https://en.cppreference.com/w/cpp/algorithm) (supported implementations are [nvc++](https://developer.nvidia.com/hpc-sdk) from NVIDIA's HPC SDK, [roc-stdpar](https://github.com/ROCm/roc-stdpar) as a patched LLVM, [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html) as Intel's oneAPI compiler, [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp), and [GNU GCC](https://gcc.gnu.org/) using TBB). <br>
      **Note**: due to the nature of the used USM mechanics in the `stdpar` implementations, the `stdpar` backend **can't** be enabled together with **any** other backend! <br>
      **Note**: since every translation units need to be compiled with the same flag, we currently globally set `CMAKE_CXX_FLAGS` although it's discouraged in favor of `target_compile_options`.
@@ -105,6 +106,10 @@ Additional dependencies for the stdpar backend:
 
 - compiler with stdpar support
 
+Additional dependencies for the HPX backend:
+
+- [HPX ≥ v1.9.0](https://hpx.stellar-group.org/)
+
 Additional dependencies for the CUDA backend:
 
 - CUDA SDK
@@ -355,6 +360,9 @@ Available configure presets:
   "openmp"                  - OpenMP backend
   "openmp_python"           - OpenMP backend + Python bindings
   "openmp_test"             - OpenMP backend tests
+  "hpx"                     - HPX backend
+  "hpx_python"              - HPX backend + Python bindings
+  "hpx_test"                - HPX backend tests
   "cuda"                    - CUDA backend
   "cuda_python"             - CUDA backend + Python bindings
   "cuda_test"               - CUDA backend tests
@@ -545,7 +553,7 @@ Usage:
   -i, --max_iter arg            set the maximum number of CG iterations (default: num_features)
   -l, --solver arg              choose the solver: automatic|cg_explicit|cg_implicit (default: automatic)
   -a, --classification arg      the classification strategy to use for multi-class classification: oaa|oao (default: oaa)
-  -b, --backend arg             choose the backend: automatic|openmp|cuda|hip|opencl|sycl|stdpar (default: automatic)
+  -b, --backend arg             choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|stdpar (default: automatic)
   -p, --target_platform arg     choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic)
       --sycl_kernel_invocation_type arg
                                 choose the kernel invocation type when using SYCL as backend: automatic|nd_range (default: automatic)
@@ -589,13 +597,14 @@ The `--backend=automatic` option works as follows:
 - if the `gpu_nvidia` target is available, check for existing backends in order `cuda` 🠦 `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
 - otherwise, if the `gpu_amd` target is available, check for existing backends in order `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
 - otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `stdpar`
-- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `stdpar`
+- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `hpx` 🠦 `stdpar`
 
 Note that during CMake configuration it is guaranteed that at least one of the above combinations does exist.
 
 The `--target_platform=automatic` option works for the different backends as follows:
 
 - `OpenMP`: always selects a CPU
+- `HPX`: always selects a CPU
 - `CUDA`: always selects an NVIDIA GPU (if no NVIDIA GPU is available, throws an exception)
 - `HIP`: always selects an AMD GPU (if no AMD GPU is available, throws an exception)
 - `OpenCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt
index 5bead042a..f951f77a4 100644
--- a/bindings/Python/CMakeLists.txt
+++ b/bindings/Python/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
 ## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
 ## License: This file is part of the PLSSVM project which is released under the MIT license.
 ##          See the LICENSE.md file in the project root for full license information.
@@ -68,6 +68,9 @@ endif ()
 if (TARGET ${PLSSVM_OPENMP_BACKEND_LIBRARY_NAME})
     list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/openmp_csvm.cpp)
 endif ()
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+    list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/hpx_csvm.cpp)
+endif ()
 if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
     
     # AdaptiveCpp stdpar only support on the CPU when using our Python bindings
@@ -125,4 +128,4 @@ target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$<COMPILE_LANG_AND_I
 target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC -fPIC)
 
 # append pybind11 bindings library to installed targets
-append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME})
\ No newline at end of file
+append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME})
diff --git a/bindings/Python/README.md b/bindings/Python/README.md
index 40648cca7..afe9d6bb4 100644
--- a/bindings/Python/README.md
+++ b/bindings/Python/README.md
@@ -10,7 +10,7 @@
         - [plssvm.Parameter](#plssvmparameter)
         - [plssvm.DataSet](#plssvmdataset)
         - [plssvm.CSVM](#plssvmcsvm)
-        - [plssvm.openmp.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM](#plssvmopenmpcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm)
+        - [plssvm.openmp.CSVM, plssvm.hpx.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM](#plssvmopenmpcsvm-plssvmhpxcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm)
         - [plssvm.Model](#plssvmmodel)
         - [plssvm.Version](#plssvmversion)
         - [plssvm.environment.ScopeGuard](#plssvmenvironmentscopeguard)
@@ -196,7 +196,7 @@ The following table lists all PLSSVM enumerations exposed on the Python side:
 | `FileFormatType`       | `LIBSVM`, `ARFF`                                                     | The different supported file format types (default: `LIBSVM`).                                                                                                                                                                                              |
 | `GammaCoefficientType` | `AUTOMATIC`, `SCALE`                                                 | The different modes for the dynamic gamma calculation (default: `AUTOMATIC`).                                                                                                                                                                               |
 | `ClassificationType`   | `OAA`, `OAO`                                                         | The different supported multi-class classification strategies (default: `LIBSVM`).                                                                                                                                                                          |
-| `BackendType`          | `AUTOMATIC`, `OPENMP`, `CUDA`, `HIP`, `OPENCL`, `SYCL`               | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform.                                                                                                              |
+| `BackendType`          | `AUTOMATIC`, `OPENMP`, `HPX`, `CUDA`, `HIP`, `OPENCL`, `SYCL`        | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform.                                                                                                              |
 | `VerbosityLevel`       | `QUIET`, `LIBSVM`, `TIMING`, `FULL`                                  | The different supported log levels (default: `FULL`). `QUIET` means no output, `LIBSVM` output that is as conformant as possible with LIBSVM's output, `TIMING` all timing related outputs, and `FULL` everything. Can be combined via bit-wise operations. |
 | `Status`               | `UNINITIALIZED`, `INITIALIZED`, `FINALIZED`, `UNNECESSARY`           | The different environment status values. **Note**: located in the `plssvm.environment` module.                                                                                                                                                              |                                                                                                                                                                                                                   |
 
@@ -337,6 +337,10 @@ If the most performant backend should be used, it is sufficient to use `plssvm.C
 `sycl_implementation_type` to choose between DPC++ and AdaptiveCpp as SYCL implementations
 and `sycl_kernel_invocation_type` to choose between the two different SYCL kernel invocation types.
 
+**Note**: if the backend type is `plssvm.BackendType.HPX` it is necessary to initialize and finalize the HPX runtime.
+The runtime can be manually managed using `plssvm.environment.initialize()` and `plssvm.environment.finalize()`.
+We recommend utilizing `plssvm.environment.ScopeGuard()` to manage the lifetime of the HPX runtime automatically.
+
 | methods                                                                                                                                      | description                                                                                                                                                                                                         |
 |----------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `set_params(params)`                                                                                                                         | Replace the current `plssvm.Parameter` with the provided one.                                                                                                                                                       |
@@ -349,7 +353,7 @@ and `sycl_kernel_invocation_type` to choose between the two different SYCL kerne
 | `score(model)`                                                                                                                               | Score the model with respect to itself returning its accuracy.                                                                                                                                                      |
 | `score(model, data_set)`                                                                                                                     | Score the model given the provided data set returning its accuracy.                                                                                                                                                 |
 
-#### `plssvm.openmp.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM`
+#### `plssvm.openmp.CSVM`, `plssvm.hpx.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM`
 
 These classes represent the backend specific CSVMs.
 **Note**: they are only available if the respective backend has been enabled during PLSSVM's build step.
@@ -560,4 +564,4 @@ The PLSSVM Python3 bindings define a few new exception types:
 | `ClassificationReportError`  | If something in the classification report went wrong. **Note**: shouldn't occur in user code.                          |
 | `EnvironmentError`           | If something during environment initialization or finalization went wrong.                                             |
 
-Depending on the available backends, additional `BackendError`s are also available (e.g., `plssvm.cuda.BackendError`).
\ No newline at end of file
+Depending on the available backends, additional `BackendError`s are also available (e.g., `plssvm.cuda.BackendError`).
diff --git a/bindings/Python/backend_types.cpp b/bindings/Python/backend_types.cpp
index 8a1fa29fb..f88f8c2e2 100644
--- a/bindings/Python/backend_types.cpp
+++ b/bindings/Python/backend_types.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -20,6 +21,7 @@ void init_backend_types(py::module_ &m) {
     py::enum_<plssvm::backend_type>(m, "BackendType")
         .value("AUTOMATIC", plssvm::backend_type::automatic, "the default backend; depends on the specified target platform")
         .value("OPENMP", plssvm::backend_type::openmp, "OpenMP to target CPUs only (currently no OpenMP target offloading support)")
+        .value("HPX", plssvm::backend_type::hpx, "HPX to target CPUs only (currently no GPU executor support)")
         .value("STDPAR", plssvm::backend_type::stdpar, "C++ standard parallelism to target CPUs and GPUs from different vendors based on the used stdpar implementation; supported implementations are: nvhpc (nvc++), roc-stdpar, AdaptiveCpp, Intel LLVM (icpx), and GNU GCC + TBB")
         .value("CUDA", plssvm::backend_type::cuda, "CUDA to target NVIDIA GPUs only")
         .value("HIP", plssvm::backend_type::hip, "HIP to target AMD and NVIDIA GPUs")
diff --git a/bindings/Python/backends/hpx_csvm.cpp b/bindings/Python/backends/hpx_csvm.cpp
new file mode 100644
index 000000000..92b4fef10
--- /dev/null
+++ b/bindings/Python/backends/hpx_csvm.cpp
@@ -0,0 +1,57 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/HPX/csvm.hpp"        // plssvm::hpx::csvm
+#include "plssvm/backends/HPX/exceptions.hpp"  // plssvm::hpx::backend_exception
+#include "plssvm/csvm.hpp"                     // plssvm::csvm
+#include "plssvm/exceptions/exceptions.hpp"    // plssvm::exception
+#include "plssvm/parameter.hpp"                // plssvm::parameter
+#include "plssvm/target_platforms.hpp"         // plssvm::target_platform
+
+#include "bindings/Python/utility.hpp"  // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception
+
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init
+#include "pybind11/stl.h"       // support for STL types
+
+#include <memory>  // std::make_unique
+
+namespace py = pybind11;
+
+void init_hpx_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
+    // use its own submodule for the HPX CSVM bindings
+    py::module_ hpx_module = m.def_submodule("hpx", "a module containing all HPX backend specific functionality");
+
+    // bind the CSVM using the HPX backend
+    py::class_<plssvm::hpx::csvm, plssvm::csvm>(hpx_module, "CSVM")
+        .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object")
+        .def(py::init<plssvm::parameter>(), "create an SVM with the automatic target platform and provided parameter object")
+        .def(py::init<plssvm::target_platform>(), "create an SVM with the provided target platform and default parameter object")
+        .def(py::init<plssvm::target_platform, plssvm::parameter>(), "create an SVM with the provided target platform and parameter object")
+        .def(py::init([](const py::kwargs &args) {
+                 // check for valid keys
+                 check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
+                 // if one of the value keyword parameter is provided, set the respective value
+                 const plssvm::parameter params = convert_kwargs_to_parameter(args);
+                 // create CSVM with the default target platform
+                 return std::make_unique<plssvm::hpx::csvm>(params);
+             }),
+             "create an SVM with the default target platform and keyword arguments")
+        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
+                 // check for valid keys
+                 check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
+                 // if one of the value keyword parameter is provided, set the respective value
+                 const plssvm::parameter params = convert_kwargs_to_parameter(args);
+                 // create CSVM with the provided target platform
+                 return std::make_unique<plssvm::hpx::csvm>(target, params);
+             }),
+             "create an SVM with the provided target platform and keyword arguments");
+
+    // register HPX backend specific exceptions
+    register_py_exception<plssvm::hpx::backend_exception>(hpx_module, "BackendError", base_exception);
+}
diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp
index f37bc20db..170afa2c3 100644
--- a/bindings/Python/main.cpp
+++ b/bindings/Python/main.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -35,6 +36,7 @@ void init_environment(py::module_ &);
 void init_exceptions(py::module_ &, const py::exception<plssvm::exception> &);
 void init_csvm(py::module_ &);
 void init_openmp_csvm(py::module_ &, const py::exception<plssvm::exception> &);
+void init_hpx_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_stdpar_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_cuda_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_hip_csvm(py::module_ &, const py::exception<plssvm::exception> &);
@@ -86,6 +88,9 @@ PYBIND11_MODULE(plssvm, m) {
 #if defined(PLSSVM_HAS_OPENMP_BACKEND)
     init_openmp_csvm(m, base_exception);
 #endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    init_hpx_csvm(m, base_exception);
+#endif
 #if defined(PLSSVM_HAS_STDPAR_BACKEND)
     init_stdpar_csvm(m, base_exception);
 #endif
diff --git a/cmake/plssvm/plssvmConfig.cmake.in b/cmake/plssvm/plssvmConfig.cmake.in
index e6be17d15..9636e125e 100644
--- a/cmake/plssvm/plssvmConfig.cmake.in
+++ b/cmake/plssvm/plssvmConfig.cmake.in
@@ -25,7 +25,7 @@ find_dependency(fmt REQUIRED)
 include("${CMAKE_CURRENT_LIST_DIR}/plssvmTargets.cmake")
 
 # list all available libraries
-set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar")
+set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;HPX;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar")
 set(PLSSVM_DISABLED_COMPONENTS "${PLSSVM_SUPPORTED_COMPONENTS}")
 
 # check which libraries are available
diff --git a/cmake/plssvm/plssvmHPXTargets.cmake b/cmake/plssvm/plssvmHPXTargets.cmake
new file mode 100644
index 000000000..8fa711790
--- /dev/null
+++ b/cmake/plssvm/plssvmHPXTargets.cmake
@@ -0,0 +1,21 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+include(CMakeFindDependencyMacro)
+
+# check if the HPX backend is available
+if (TARGET plssvm::plssvm-HPX)
+    # enable HPX
+    find_dependency(HPX)
+    # set alias targets
+    add_library(plssvm::HPX ALIAS plssvm::plssvm-HPX)
+    add_library(plssvm::hpx ALIAS plssvm::plssvm-HPX)
+    # set COMPONENT to be found
+    set(plssvm_HPX_FOUND ON)
+else ()
+    # set COMPONENT to be NOT found
+    set(plssvm_HPX_FOUND OFF)
+endif ()
diff --git a/cmake/plssvm/plssvmOpenMPTargets.cmake b/cmake/plssvm/plssvmOpenMPTargets.cmake
index db95e1d1e..d8a6951f2 100644
--- a/cmake/plssvm/plssvmOpenMPTargets.cmake
+++ b/cmake/plssvm/plssvmOpenMPTargets.cmake
@@ -6,7 +6,7 @@
 
 include(CMakeFindDependencyMacro)
 
-# check if the OpenCL backend is available
+# check if the OpenMP backend is available
 if (TARGET plssvm::plssvm-OpenMP)
     # enable OpenMP
     find_dependency(OpenMP)
@@ -18,4 +18,4 @@ if (TARGET plssvm::plssvm-OpenMP)
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_OpenMP_FOUND OFF)
-endif ()
\ No newline at end of file
+endif ()
diff --git a/cmake/presets/all.json b/cmake/presets/all.json
index 76528069b..a1db4d1bc 100644
--- a/cmake/presets/all.json
+++ b/cmake/presets/all.json
@@ -9,6 +9,7 @@
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -23,6 +24,7 @@
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -39,6 +41,7 @@
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -84,7 +87,7 @@
       "inherits": "common",
       "filter": {
         "include": {
-          "name": "OpenMP.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*"
+          "name": "OpenMP.*|HPX.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*"
         }
       }
     }
@@ -155,4 +158,4 @@
       ]
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/cmake/presets/common.json b/cmake/presets/common.json
index fac5b4b22..68da8cd61 100644
--- a/cmake/presets/common.json
+++ b/cmake/presets/common.json
@@ -12,6 +12,7 @@
       "binaryDir": "build/${presetName}",
       "cacheVariables": {
         "PLSSVM_ENABLE_OPENMP_BACKEND": "OFF",
+        "PLSSVM_ENABLE_HPX_BACKEND": "OFF",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "OFF",
         "PLSSVM_ENABLE_HIP_BACKEND": "OFF",
@@ -66,4 +67,4 @@
       }
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/cmake/presets/hpx.json b/cmake/presets/hpx.json
new file mode 100644
index 000000000..8ca724653
--- /dev/null
+++ b/cmake/presets/hpx.json
@@ -0,0 +1,143 @@
+{
+  "version": 6,
+  "include": ["common.json"],
+  "configurePresets": [
+    {
+      "name": "hpx",
+      "displayName": "HPX backend",
+      "inherits": "build",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
+      }
+    },
+    {
+      "name": "hpx_python",
+      "displayName": "HPX backend + Python bindings",
+      "inherits": "build",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu",
+        "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
+        "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
+      }
+    },
+    {
+      "name": "hpx_test",
+      "displayName": "HPX backend tests",
+      "inherits": "test",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "hpx",
+      "displayName": "HPX backend",
+      "configurePreset": "hpx",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "hpx_python",
+      "displayName": "HPX backend + Python bindings",
+      "configurePreset": "hpx_python",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "hpx_test",
+      "displayName": "HPX backend tests",
+      "configurePreset": "hpx_test",
+      "configuration": "Debug",
+      "inherits": "common"
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "hpx_test",
+      "displayName": "HPX backend all tests",
+      "configurePreset": "hpx_test",
+      "inherits": "common"
+    },
+    {
+      "name": "hpx_backend_test",
+      "displayName": "HPX backend specific tests",
+      "configurePreset": "hpx_test",
+      "inherits": "common",
+      "filter": {
+        "include": {
+          "name": "HPX.*"
+        }
+      }
+    }
+  ],
+  "workflowPresets": [
+    {
+      "name": "hpx",
+      "displayName": "HPX backend workflow",
+      "steps": [
+        {
+          "name": "hpx",
+          "type": "configure"
+        },
+        {
+          "name": "hpx",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "hpx_python",
+      "displayName": "HPX backend + Python bindings workflow",
+      "steps": [
+        {
+          "name": "hpx_python",
+          "type": "configure"
+        },
+        {
+          "name": "hpx_python",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "hpx_test",
+      "displayName": "HPX test workflow",
+      "steps": [
+        {
+          "name": "hpx_test",
+          "type": "configure"
+        },
+        {
+          "name": "hpx_test",
+          "type": "build"
+        },
+        {
+          "name": "hpx_test",
+          "type": "test"
+        }
+      ]
+    },
+    {
+      "name": "hpx_backend_test",
+      "displayName": "HPX backend test workflow",
+      "steps": [
+        {
+          "name": "hpx_test",
+          "type": "configure"
+        },
+        {
+          "name": "hpx_test",
+          "type": "build"
+        },
+        {
+          "name": "hpx_backend_test",
+          "type": "test"
+        }
+      ]
+    }
+  ]
+}
diff --git a/cmake/presets/openmp.json b/cmake/presets/openmp.json
index c4dd27a4d..1031d50d0 100644
--- a/cmake/presets/openmp.json
+++ b/cmake/presets/openmp.json
@@ -7,7 +7,8 @@
       "displayName": "OpenMP backend",
       "inherits": "build",
       "cacheVariables": {
-        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON"
+        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
       }
     },
     {
@@ -16,6 +17,7 @@
       "inherits": "build",
       "cacheVariables": {
         "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu",
         "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
         "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
       }
@@ -25,7 +27,8 @@
       "displayName": "OpenMP backend tests",
       "inherits": "test",
       "cacheVariables": {
-        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON"
+        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
       }
     }
   ],
@@ -137,4 +140,4 @@
       ]
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox
index 8c3119aab..84e561a46 100644
--- a/docs/resources/dirs.dox
+++ b/docs/resources/dirs.dox
@@ -329,6 +329,66 @@
  * @brief Directory containing kernel implementations for the implicit CG algorithm using the stdpar backend.
  */
 
+ /**
+ * @dir include/plssvm/backends/HPX
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing the implementation for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/detail
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @authir Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementation details for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/kernel
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing all kernels for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/kernel/cg_explicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the explicit CG algorithm using the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/HPX/kernel/cg_implicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack 
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the implicit CG algorithm using the HPX backend.
+ */
+
 /**
  * @dir include/plssvm/backends/SYCL
  * @author Alexander Van Craen
@@ -504,4 +564,4 @@
  *          See the LICENSE.md file in the project root for full license information.
  *
  * @brief Directory containing compile-time constant meta data for git specific information.
- */
\ No newline at end of file
+ */
diff --git a/include/plssvm/backend_types.hpp b/include/plssvm/backend_types.hpp
index 7bdbcb9e4..449f5dcdd 100644
--- a/include/plssvm/backend_types.hpp
+++ b/include/plssvm/backend_types.hpp
@@ -2,6 +2,7 @@
  * @file
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -44,7 +45,9 @@ enum class backend_type {
     /** [OpenCL](https://www.khronos.org/opencl/) to target CPUs and GPUs from different vendors. */
     opencl,
     /** [SYCL](https://www.khronos.org/sycl/) to target CPUs and GPUs from different vendors. Currently tested SYCL implementations are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL). */
-    sycl
+    sycl,
+    /** [HPX] (https://hpx.stellar-group.org/) to target CPUs only (currently no GPU support). */
+    hpx
 };
 
 /**
@@ -84,6 +87,7 @@ std::istream &operator>>(std::istream &in, backend_type &backend);
 // Forward declare all possible C-SVMs.
 namespace openmp { class csvm; }
 namespace stdpar { class csvm; }
+namespace hpx { class csvm; }
 namespace cuda { class csvm; }
 namespace hip { class csvm; }
 namespace opencl { class csvm; }
@@ -118,6 +122,15 @@ struct csvm_to_backend_type<stdpar::csvm> {
     constexpr static backend_type value = backend_type::stdpar;
 };
 
+/**
+ * @brief Sets the `value` to `plssvm::backend_type::hpx` for the HPX C-SVM.
+ */
+template <>
+struct csvm_to_backend_type<hpx::csvm> {
+    /// The enum value representing the hpx backend.
+    constexpr static backend_type value = backend_type::hpx;
+};
+
 /**
  * @brief Sets the `value` to `plssvm::backend_type::cuda` for the CUDA C-SVM.
  */
diff --git a/include/plssvm/backends/HPX/csvm.hpp b/include/plssvm/backends/HPX/csvm.hpp
new file mode 100644
index 000000000..d9dba1e6e
--- /dev/null
+++ b/include/plssvm/backends/HPX/csvm.hpp
@@ -0,0 +1,170 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a C-SVM using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_CSVM_HPP_
+#define PLSSVM_BACKENDS_HPX_CSVM_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp"             // plssvm::real_type
+#include "plssvm/csvm.hpp"                  // plssvm::csvm, plssvm::detail::csvm_backend_exists
+#include "plssvm/detail/memory_size.hpp"    // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp"  // plssvm::detail::move_only_any
+#include "plssvm/detail/type_traits.hpp"    // PLSSVM_REQUIRES
+#include "plssvm/matrix.hpp"                // plssvm::aos_matrix
+#include "plssvm/parameter.hpp"             // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
+#include "plssvm/solver_types.hpp"          // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"      // plssvm::target_platform
+
+#include <cstddef>      // std::size_t
+#include <type_traits>  // std::true_type
+#include <utility>      // std::forward, std::pair
+#include <vector>       // std::vector
+
+namespace plssvm {
+
+namespace hpx {
+
+/**
+ * @brief A C-SVM implementation using hpx as backend.
+ */
+class csvm : public ::plssvm::csvm {
+  public:
+    /**
+     * @brief Construct a new C-SVM using the HPX backend with the parameters given through @p params.
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    explicit csvm(parameter params = {});
+    /**
+     * @brief Construct a new C-SVM using the hpx backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVM
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    explicit csvm(target_platform target, parameter params = {});
+
+    /**
+     * @brief Construct a new C-SVM using the HPX backend and the optionally provided @p named_args.
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvm(Args &&...named_args) :
+        ::plssvm::csvm{ std::forward<Args>(named_args)... } {
+        // the default target is the automatic one
+        this->init(plssvm::target_platform::automatic);
+    }
+
+    /**
+     * @brief Construct a new C-SVM using the HPX backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] target the target platform used for this C-SVM
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvm(const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::forward<Args>(named_args)... } {
+        this->init(target);
+    }
+
+    /**
+     * @copydoc plssvm::csvm::csvm(const plssvm::csvm &)
+     */
+    csvm(const csvm &) = delete;
+    /**
+     * @copydoc plssvm::csvm::csvm(plssvm::csvm &&) noexcept
+     */
+    csvm(csvm &&) noexcept = default;
+    /**
+     * @copydoc plssvm::csvm::operator=(const plssvm::csvm &)
+     */
+    csvm &operator=(const csvm &) = delete;
+    /**
+     * @copydoc plssvm::csvm::operator=(plssvm::csvm &&) noexcept
+     */
+    csvm &operator=(csvm &&) noexcept = default;
+    /**
+     * @brief Default destructor since the copy and move constructors and copy- and move-assignment operators are defined.
+     */
+    ~csvm() override = default;
+
+    /**
+     * @copydoc plssvm::csvm::num_available_devices
+     * @note We currently only support one device for C++ standard parallelism.
+     */
+    [[nodiscard]] std::size_t num_available_devices() const noexcept override {
+        return 1;
+    }
+
+  protected:
+    /**
+     * @copydoc plssvm::csvm::get_device_memory
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_device_memory() const final;
+    /**
+     * @copydoc plssvm::csvm::get_max_mem_alloc_size
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_max_mem_alloc_size() const final;
+
+    //***************************************************//
+    //                        fit                        //
+    //***************************************************//
+    /**
+     * @copydoc plssvm::csvm::assemble_kernel_matrix
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::move_only_any> assemble_kernel_matrix(solver_type solver, const parameter &params, const soa_matrix<real_type> &A, const std::vector<real_type> &q_red, real_type QA_cost) const final;
+    /**
+     * @copydoc plssvm::csvm::blas_level_3
+     */
+    void blas_level_3(solver_type solver, real_type alpha, const std::vector<::plssvm::detail::move_only_any> &A, const soa_matrix<real_type> &B, real_type beta, soa_matrix<real_type> &C) const final;
+
+    //***************************************************//
+    //                   predict, score                  //
+    //***************************************************//
+    /**
+     * @copydoc plssvm::csvm::predict_values
+     */
+    [[nodiscard]] aos_matrix<real_type> predict_values(const parameter &params, const soa_matrix<real_type> &support_vectors, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, soa_matrix<real_type> &w, const soa_matrix<real_type> &predict_points) const final;
+
+  private:
+    /**
+     * @brief Initializes the hpx backend and performs some sanity checks.
+     * @param[in] target the target platform to use
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    void init(target_platform target);
+};
+
+}  // namespace hpx
+
+namespace detail {
+
+/**
+ * @brief Sets the `value` to `true` since C-SVMs using the HPX backend are available.
+ */
+template <>
+struct csvm_backend_exists<hpx::csvm> : std::true_type { };
+
+}  // namespace detail
+
+}  // namespace plssvm
+
+#endif  // PLSSVM_BACKENDS_HPX_CSVM_HPP_
diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
new file mode 100644
index 000000000..3fcdb04d0
--- /dev/null
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -0,0 +1,38 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Utility functions specific to the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
+#define PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
+#pragma once
+
+#include "boost/atomic/atomic_ref.hpp"  // boost::atomic_ref
+#include <string>                       // std::string
+
+namespace plssvm::hpx::detail {
+
+using boost::atomic_ref;
+
+/**
+ * @brief Return the number of used CPU threads in the HPX backend.
+ * @return the number of used CPU threads (`[[nodiscard]]`)
+ */
+[[nodiscard]] int get_num_threads();
+
+/**
+ * @brief Return the HPX version used.
+ * @return the HPX version (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string get_hpx_version();
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
diff --git a/include/plssvm/backends/HPX/exceptions.hpp b/include/plssvm/backends/HPX/exceptions.hpp
new file mode 100644
index 000000000..fc7925f24
--- /dev/null
+++ b/include/plssvm/backends/HPX/exceptions.hpp
@@ -0,0 +1,39 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implements custom exception classes specific to the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
+#define PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
+#pragma once
+
+#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
+
+#include <string>  // std::string
+
+namespace plssvm::hpx {
+
+/**
+ * @brief Exception type thrown if a problem with the HPX backend occurs.
+ */
+class backend_exception : public exception {
+  public:
+    /**
+     * @brief Construct a new exception forwarding the exception message and source location to plssvm::exception.
+     * @param[in] msg the exception's `what()` message
+     * @param[in] loc the exception's call side information
+     */
+    explicit backend_exception(const std::string &msg, source_location loc = source_location::current());
+};
+
+}  // namespace plssvm::hpx
+
+#endif  // PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
new file mode 100644
index 000000000..09f6e6358
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -0,0 +1,107 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp"      // plssvm::{real_type, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"  // PLSSVM_ASSERT
+#include "plssvm/matrix.hpp"         // plssvm::soa_matrix
+#include "plssvm/shape.hpp"          // plssvm::shape
+
+#include <array>                                           // std::array
+#include <cmath>                                           // std::ceil
+#include <cstddef>                                         // std::size_t
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a symmetric matrix (memory optimized), @p B and @p C are matrices, and @p alpha and @p beta are scalars.
+ * @param[in] num_rows the number of rows in @p A and @p C
+ * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] alpha the scalar alpha value
+ * @param[in] A the matrix @p A
+ * @param[in] B the matrix @p B
+ * @param[in] beta the scalar beta value
+ * @param[in,out] C the matrix @p C, also used as result matrix
+ */
+inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+    PLSSVM_ASSERT(A.size() == (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2, "A matrix sizes mismatch!: {} != {}", A.size(), (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+
+    // calculate constants
+    const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows) / INTERNAL_BLOCK_SIZE));
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_rows);  // define range over which should be iterated
+    std::iota(range.begin(), range.end(), 0);
+
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t rhs = idx / blocked_num_rows;
+        const std::size_t row = idx % blocked_num_rows;
+
+        const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_rows; ++dim) {
+            // perform the dot product calculation
+            for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                    const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
+                    const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+
+                    real_type A_val = 0.0;
+                    // determine on which side of the diagonal we are located
+                    if (dim < global_j) {
+                        A_val = A.data()[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
+                    } else {
+                        A_val = A.data()[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                    }
+                    temp[internal_i][internal_j] += A_val * B.data()[dim * (num_rhs + PADDING_SIZE_uz) + global_i];
+                }
+            }
+        }
+
+        // apply the (partial) BLAS operation and update C
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
+                const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+
+                // be sure to not perform out of bounds accesses
+                if (global_i < num_rhs && global_j < num_rows) {
+                    C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i] = alpha * temp[internal_i][internal_j] + beta * C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i];
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
new file mode 100644
index 000000000..2e59bf078
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -0,0 +1,111 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly assembling the kernel matrix using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                             // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                // plssvm::aos_matrix
+
+#include <array>                                           // std::array
+#include <cmath>                                           // std::ceil, std::sqrt
+#include <cstddef>                                         // std::size_t
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Assemble the kernel matrix using the @p kernel function.
+ * @tparam kernel the compile-time kernel function to use
+ * @tparam Args the types of the potential additional arguments for the @p kernel function
+ * @param[in] q the `q` vector
+ * @param[out] kernel_matrix the resulting kernel matrix
+ * @param[in] data the data matrix
+ * @param[in] QA_cost he bottom right matrix entry multiplied by cost
+ * @param[in] cost 1 / the cost parameter in the C-SVM
+ * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
+ */
+template <kernel_function_type kernel, typename... Args>
+void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
+    PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+    PLSSVM_ASSERT(kernel_matrix.size() == (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2, "Sizes mismatch (SYMM)!: {} != {}", kernel_matrix.size(), (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
+
+    const std::size_t dept = q.size();
+    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = data.num_cols();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
+    std::iota(range.begin(), range.end(), 0);
+
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
+        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
+
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            // perform the feature reduction calculation
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+                }
+            }
+        }
+
+        // apply the remaining part of the kernel function and store the value in the output kernel matrix
+        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                // calculate the indices to access the kernel matrix (the part stored on the current device)
+                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                if (global_row < dept && global_col < dept && global_row >= global_col) {
+                    real_type temp_ij = temp[internal_row][internal_col];
+                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
+                    // apply the cost on the diagonal
+                    if (global_row == global_col) {
+                        temp_ij += cost;
+                    }
+                    kernel_matrix[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
new file mode 100644
index 000000000..eef6b809d
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -0,0 +1,133 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for performing a matrix-matrix multiplication using an implicit kernel matrix.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/detail/utility.hpp"           // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                             // plssvm::real_type
+#include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
+#include "plssvm/detail/operators.hpp"                      // overloaded arithmetic operations for a plssvm::matrix
+#include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/kernel_functions.hpp"                      // plssvm::kernel_function
+#include "plssvm/matrix.hpp"                                // aos_matrix
+
+#include <array>                                           // std::array
+#include <cmath>                                           // std::ceil
+#include <cstddef>                                         // std::size_t, std::sqrt
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Perform an implicit BLAS SYMM-like operation: `C = alpha * A * B + C` where `A` is the implicitly calculated kernel matrix using the @p kernel function (never actually stored, reducing the amount of needed global memory), @p B and @p C are matrices, and @p alpha is a scalar.
+ * @tparam kernel the compile-time kernel function to use
+ * @tparam Args the types of the potential additional arguments for the @p kernel function
+ * @param[in] alpha the scalar alpha value
+ * @param[in] q the `q` vector
+ * @param[in] data the data matrix
+ * @param[in] QA_cost he bottom right matrix entry multiplied by cost
+ * @param[in] cost 1 / the cost parameter in the C-SVM
+ * @param[in] B the matrix @p B
+ * @param[in] beta the beta alpha value
+ * @param[in,out] C the matrix @p C
+ * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
+ */
+template <kernel_function_type kernel, typename... Args>
+inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
+    PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+    PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
+    PLSSVM_ASSERT(B.shape() == C.shape(), "The matrices B and C must have the same shape!");
+    PLSSVM_ASSERT(B.num_cols() == q.size(), "The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), q.size());
+
+    using namespace operators;
+
+    // alpha * A * B + beta * C
+    C *= beta;
+
+    // calculate constants
+    const std::size_t dept = q.size();
+    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = data.num_cols();
+    const std::size_t num_classes = B.num_rows();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
+    std::iota(range.begin(), range.end(), 0);
+
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
+        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
+
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+                }
+            }
+        }
+
+        // apply the remaining part of the kernel function and store the value in the output kernel matrix
+        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                if (global_row < dept && global_col < dept && global_row >= global_col) {
+                    real_type temp_ij = temp[internal_row][internal_col];
+                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
+                    // apply the cost on the diagonal
+                    if (global_row == global_col) {
+                        temp_ij += cost;
+                        // calculate the values of alpha * A * B
+                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                            atomic_ref<real_type>{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                        }
+                    } else {
+                        // calculate the values of alpha * A * B
+                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                            atomic_ref<real_type>{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
+                            // symmetry
+                            atomic_ref<real_type>{ C.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                        }
+                    }
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/kernel_functions.hpp b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
new file mode 100644
index 000000000..b7be1cb16
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
@@ -0,0 +1,159 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implement the different kernel functions for the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp"              // plssvm::real_type
+#include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
+
+#define PLSSVM_HPX_KERNEL_FUNCTION
+
+#include <cmath>   // std::abs, std::pow, std::exp, std::tanh
+#include <limits>  // std::numeric_limits::min
+
+namespace plssvm::hpx::detail {
+
+//***************************************************//
+//                 feature reductions                //
+//***************************************************//
+
+/**
+ * @brief Compute the default feature reduction, i.e., a simple dot-product.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <kernel_function_type kernel_function>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+    return val1 * val2;
+}
+
+/**
+ * @brief Compute the feature reduction for the radial basis function kernel function, i.e., the squared Euclidean distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::rbf>(const real_type val1, const real_type val2) {
+    const real_type d = val1 - val2;
+    return d * d;
+}
+
+/**
+ * @brief Compute the feature reduction for the laplacian kernel function, i.e., the Manhattan distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::laplacian>(const real_type val1, const real_type val2) {
+    return std::abs(val1 - val2);
+}
+
+/**
+ * @brief Compute the feature reduction for the chi-squared kernel function.
+ * @note Be sure that the denominator isn't 0.0 which may be the case for padding values.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::chi_squared>(const real_type val1, const real_type val2) {
+    const real_type d = val1 - val2;
+    return (real_type{ 1.0 } / (val1 + val2 + std::numeric_limits<real_type>::min())) * d * d;
+}
+
+//***************************************************//
+//                  kernel functions                 //
+//***************************************************//
+
+/**
+ * @brief Unimplemented base-template for all kernel functions.
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <kernel_function_type, typename... Args>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(real_type, Args...);
+
+/**
+ * @brief Compute the linear kernel function using @p value.
+ * @param[in] value the value to apply the linear kernel function to
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::linear>(const real_type value) {
+    return value;
+}
+
+/**
+ * @brief Compute the polynomial kernel function using @p value.
+ * @param[in] value the value to apply the polynomial kernel function to
+ * @param[in] degree the degree parameter of the polynomial kernel function
+ * @param[in] gamma the gamma parameter of the polynomial kernel function
+ * @param[in] coef0 the coef0 parameter of the polynomial kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::polynomial>(const real_type value, const int degree, const real_type gamma, const real_type coef0) {
+    return std::pow(gamma * value + coef0, (real_type) degree);
+}
+
+/**
+ * @brief Compute the radial basis function kernel function using @p value.
+ * @param[in] value the value to apply the rbf kernel function to
+ * @param[in] gamma the gamma parameter of the rbf kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::rbf>(const real_type value, const real_type gamma) {
+    return std::exp(-gamma * value);
+}
+
+/**
+ * @brief Compute the sigmoid kernel function using @p value.
+ * @param[in] value the value to apply the sigmoid kernel function to
+ * @param[in] gamma the gamma parameter of the kernel kernel function
+ * @param[in] coef0 the coef0 parameter of the kernel kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::sigmoid>(const real_type value, const real_type gamma, const real_type coef0) {
+    return std::tanh(gamma * value + coef0);
+}
+
+/**
+ * @brief Compute the laplacian function kernel function using @p value.
+ * @param[in] value the value to apply the laplacian kernel function to
+ * @param[in] gamma the gamma parameter of the laplacian kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::laplacian>(const real_type value, const real_type gamma) {
+    return std::exp(-gamma * value);
+}
+
+/**
+ * @brief Compute the chi-squared function kernel function using @p value.
+ * @param[in] value the value to apply the chi-squared kernel function to
+ * @param[in] gamma the gamma parameter of the chi-squared kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::chi_squared>(const real_type value, const real_type gamma) {
+    return std::exp(-gamma * value);
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
new file mode 100644
index 000000000..7b153d889
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -0,0 +1,250 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines the functions used for prediction for the C-SVM using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/detail/utility.hpp"           // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                             // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/shape.hpp"                                 // plssvm::shape
+
+#include <array>                                           // std::array
+#include <cmath>                                           // std::fma
+#include <cstddef>                                         // std::size_t
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Calculate the `w` vector used to speedup the prediction using the linear kernel function.
+ * @param[out] w the vector to speedup the linear prediction
+ * @param[in] alpha the previously learned weights
+ * @param[in] support_vectors the support vectors
+ */
+inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors) {
+    PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
+    PLSSVM_ASSERT(w.shape() == (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }), "Shape mismatch: {} vs {}!", w.shape(), (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }));
+
+    // calculate constants
+    const std::size_t num_features = support_vectors.num_cols();
+    const auto blocked_num_features = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_features) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_classes = alpha.num_rows();
+    const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_support_vectors = support_vectors.num_rows();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_features * blocked_num_classes);
+    std::iota(range.begin(), range.end(), 0);
+
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t feature = idx / blocked_num_classes;
+        const std::size_t c = idx % blocked_num_classes;
+
+        const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t sv = 0; sv < num_support_vectors; ++sv) {
+            // perform the feature reduction calculation
+            for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+                for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                    const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
+                    const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                    temp[internal_feature][internal_class] += alpha.data()[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv] * support_vectors.data()[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv];
+                }
+            }
+        }
+
+        // update global array with local one
+        for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+            for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
+                const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                w.data()[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
+            }
+        }
+    });
+}
+
+/**
+ * @brief Predict the @p predict_points_d using the linear kernel speeding up the calculation using the @p w_d vector.
+ * @param[out] prediction the predicted values
+ * @param[in] w the vector to speedup the calculations
+ * @param[in] rho the previously learned bias
+ * @param[in] predict_points the data points to predict
+ */
+inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points) {
+    PLSSVM_ASSERT(w.num_rows() == rho.size(), "Size mismatch: {} vs {}!", w.num_rows(), rho.size());
+    PLSSVM_ASSERT(w.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", w.num_cols(), predict_points.num_cols());
+    PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), w.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), w.num_rows() }));
+
+    // calculate constants
+    const std::size_t num_predict_points = predict_points.num_rows();
+    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_classes = prediction.num_cols();
+    const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = predict_points.num_cols();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_classes);
+    std::iota(range.begin(), range.end(), 0);
+
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t pp = idx / blocked_num_classes;
+        const std::size_t c = idx % blocked_num_classes;
+
+        const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            // perform the feature reduction calculation
+            for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+                for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                    temp[internal_pp][internal_class] += w.data()[dim * (num_classes + PADDING_SIZE_uz) + global_class_idx] * predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx];
+                }
+            }
+        }
+
+        // perform the dot product calculation
+        for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+            for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                if (global_pp_idx < num_predict_points && global_class_idx < num_classes) {
+                    prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_pp][internal_class] - rho.data()[global_class_idx];
+                }
+            }
+        }
+    });
+}
+
+/**
+ * @brief Predict the @p predict_points_d using the @p kernel_function.
+ * @tparam kernel the type of the used kernel function
+ * @tparam Args the types of the parameters necessary for the specific kernel function
+ * @param[out] prediction the predicted values
+ * @param[in] alpha the previously learned weights
+ * @param[in] rho the previously learned bias
+ * @param[in] support_vectors the support vectors
+ * @param[in] predict_points the data points to predict
+ * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
+ */
+template <kernel_function_type kernel, typename... Args>
+inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, Args... kernel_function_parameter) {
+    PLSSVM_ASSERT(alpha.num_rows() == rho.size(), "Size mismatch: {} vs {}!", alpha.num_rows(), rho.size());
+    PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
+    PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", support_vectors.num_cols(), predict_points.num_cols());
+    PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }));
+
+    // calculate constants
+    const std::size_t num_classes = alpha.num_rows();
+    const std::size_t num_support_vectors = support_vectors.num_rows();
+    const auto blocked_num_support_vectors = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_support_vectors) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_predict_points = predict_points.num_rows();
+    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = predict_points.num_cols();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_support_vectors);
+    std::iota(range.begin(), range.end(), 0);
+
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+        // calculate the indices used in the current thread
+        const std::size_t pp = idx / blocked_num_support_vectors;
+        const std::size_t sv = idx % blocked_num_support_vectors;
+
+        const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t sv_idx = sv * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            // perform the feature reduction calculation
+            for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+                for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
+
+                    temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(support_vectors.data()[dim * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx],
+                                                                                     predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx]);
+                }
+            }
+        }
+
+        // update temp using the respective kernel function
+        for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+            for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                temp[internal_pp][internal_sv] = detail::apply_kernel_function<kernel>(temp[internal_pp][internal_sv], kernel_function_parameter...);
+            }
+        }
+
+        // add results to prediction
+        for (std::size_t a = 0; a < num_classes; ++a) {
+            for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+                for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
+
+                    // be sure to not perform out of bounds accesses
+                    if (global_pp_idx < num_predict_points && global_sv_idx < num_support_vectors) {
+                        if (global_sv_idx == 0) {
+                            atomic_ref<real_type>{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } += -rho.data()[a];
+                        }
+                        atomic_ref<real_type>{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } +=
+                            temp[internal_pp][internal_sv] * alpha.data()[a * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx];
+                    }
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
diff --git a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
index ee4f6f15e..61729d9b8 100644
--- a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
@@ -6,7 +6,7 @@
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
  *
- * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the CUDA backend.
+ * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the OpenMP backend.
  */
 
 #ifndef PLSSVM_BACKENDS_OPENMP_KERNEL_CG_EXPLICIT_BLAS_HPP_
diff --git a/include/plssvm/core.hpp b/include/plssvm/core.hpp
index 4311a189f..4e1fd1be1 100644
--- a/include/plssvm/core.hpp
+++ b/include/plssvm/core.hpp
@@ -76,6 +76,12 @@ namespace plssvm::openmp { }
 /// Namespace containing OpenMP backend specific implementation details. **Should not** directly be used by users.
 namespace plssvm::openmp::detail { }
 
+/// Namespace containing the C-SVM using the HPX backend.
+namespace plssvm::hpx { }
+
+/// Namespace containing HPX backend specific implementation details. **Should not** directly be used by users.
+namespace plssvm::hpx::detail { }
+
 /// Namespace containing the C-SVM using the stdpar backend.
 namespace plssvm::stdpar { }
 
diff --git a/include/plssvm/csvm_factory.hpp b/include/plssvm/csvm_factory.hpp
index 01a2769ec..a1272a5e0 100644
--- a/include/plssvm/csvm_factory.hpp
+++ b/include/plssvm/csvm_factory.hpp
@@ -28,6 +28,9 @@
 #if defined(PLSSVM_HAS_STDPAR_BACKEND)
     #include "plssvm/backends/stdpar/csvm.hpp"  // plssvm::stdpar::csvm, plssvm::csvm_backend_exists_v
 #endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    #include "plssvm/backends/HPX/csvm.hpp"  // plssvm::hpx::csvm, plssvm::csvm_backend_exists_v
+#endif
 #if defined(PLSSVM_HAS_CUDA_BACKEND)
     #include "plssvm/backends/CUDA/csvm.hpp"  // plssvm::cuda::csvm, plssvm::csvm_backend_exists_v
 #endif
@@ -130,6 +133,8 @@ template <typename... Args>
             return make_csvm_default_impl<openmp::csvm>(std::forward<Args>(args)...);
         case backend_type::stdpar:
             return make_csvm_default_impl<stdpar::csvm>(std::forward<Args>(args)...);
+        case backend_type::hpx:
+            return make_csvm_default_impl<hpx::csvm>(std::forward<Args>(args)...);
         case backend_type::cuda:
             return make_csvm_default_impl<cuda::csvm>(std::forward<Args>(args)...);
         case backend_type::hip:
diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp
index 2a114f0f0..2b96416ae 100644
--- a/include/plssvm/detail/cmd/parser_predict.hpp
+++ b/include/plssvm/detail/cmd/parser_predict.hpp
@@ -37,7 +37,7 @@ struct parser_predict {
      */
     parser_predict(int argc, char **argv);
 
-    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, stdpar, CUDA, HIP, OpenCL, or SYCL.
+    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
     backend_type backend{ backend_type::automatic };
     /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
     target_platform target{ target_platform::automatic };
diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp
index 70f0c03e2..a723fa82e 100644
--- a/include/plssvm/detail/cmd/parser_train.hpp
+++ b/include/plssvm/detail/cmd/parser_train.hpp
@@ -53,7 +53,7 @@ struct parser_train {
     /// The multi-class classification strategy used.
     classification_type classification{ classification_type::oaa };
 
-    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, stdpar, CUDA, HIP, OpenCL, or SYCL.
+    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
     backend_type backend{ backend_type::automatic };
     /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
     target_platform target{ target_platform::automatic };
diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index 692c362b5..69a6dab24 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -30,6 +30,12 @@
 #include <string>   // std::string
 #include <vector>   // std::vector
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    #include <hpx/execution.hpp>  // ::hpx::post
+    #include <hpx/hpx_start.hpp>  // ::hpx::{start, stop, finalize}
+    #include <hpx/runtime.hpp>    // ::hpx::{is_running, is_stopped}
+#endif
+
 namespace plssvm::environment {
 
 /**
@@ -100,7 +106,8 @@ namespace detail {
  * @return the respective environment status (`[[nodiscard]]`)
  */
 [[nodiscard]] inline status determine_status_from_initialized_finalized_flags(const bool is_initialized, const bool is_finalized) {
-    if (!is_initialized && !is_finalized) {
+    if (!is_initialized) {
+        // Note: ::hpx::is_stopped does return true even before calling finalize once
         return status::uninitialized;
     } else if (is_initialized && !is_finalized) {
         return status::initialized;
@@ -148,6 +155,14 @@ template <auto is_initialized_function, auto is_finalized_function>
         case backend_type::sycl:
             // no environment necessary to manage these backends
             return status::unnecessary;
+        case backend_type::hpx:
+            {
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+                return detail::determine_status_from_initialized_finalized_functions<::hpx::is_running, ::hpx::is_stopped>();
+#else
+                return status::unnecessary;
+#endif
+            }
     }
     // should never be reached!
     ::plssvm::detail::unreachable();
@@ -177,7 +192,12 @@ namespace detail {
 inline void initialize_backend([[maybe_unused]] const backend_type backend) {
     PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be initialized!");
     // Note: must be implemented for the backends that need environmental setup
-    // nothing to do for all available backends
+    // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    if (backend == backend_type::hpx) {
+        ::hpx::start(nullptr, 0, nullptr);
+    }
+#endif
 }
 
 /**
@@ -189,7 +209,12 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend) {
 inline void initialize_backend([[maybe_unused]] const backend_type backend, [[maybe_unused]] int &argc, [[maybe_unused]] char **argv) {
     PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be initialized!");
     // Note: must be implemented for the backends that need environmental setup
-    // nothing to do for all available backends
+    // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    if (backend == backend_type::hpx) {
+        ::hpx::start(nullptr, argc, argv);
+    }
+#endif
 }
 
 /**
@@ -199,7 +224,13 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend, [[ma
 inline void finalize_backend([[maybe_unused]] const backend_type backend) {
     PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be finalized!");
     // Note: must be implemented for the backends that need environmental setup
-    // nothing to do for all available backends
+    // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    if (backend == backend_type::hpx) {
+        ::hpx::post([] { ::hpx::finalize(); });
+        ::hpx::stop();
+    }
+#endif
 }
 
 /**
diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 079b6ca00..ff28028c8 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -75,8 +75,15 @@ int main(int argc, char *argv[]) {
             // check whether SYCL is used as backend (it is either requested directly or as automatic backend)
             const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
 
+            // check whether HPX is used as backend (it is either requested directly or as automatic backend)
+            const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+
             // initialize environments if necessary
-            environment_guard = std::make_unique<plssvm::environment::scope_guard>();
+            std::vector<plssvm::backend_type> backends_to_initialize{};
+            if (use_hpx_as_backend) {
+                backends_to_initialize.push_back(plssvm::backend_type::hpx);
+            }
+            environment_guard = std::make_unique<plssvm::environment::scope_guard>(backends_to_initialize);
 
             // create default csvm
             const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type)
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 7f99409c7..32ac09d71 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -72,8 +72,15 @@ int main(int argc, char *argv[]) {
             // check whether SYCL is used as backend (it is either requested directly or as automatic backend)
             const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
 
+            // check whether HPX is used as backend (it is either requested directly or as automatic backend)
+            const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+
             // initialize environments if necessary
-            environment_guard = std::make_unique<plssvm::environment::scope_guard>();
+            std::vector<plssvm::backend_type> backends_to_initialize{};
+            if (use_hpx_as_backend) {
+                backends_to_initialize.push_back(plssvm::backend_type::hpx);
+            }
+            environment_guard = std::make_unique<plssvm::environment::scope_guard>(backends_to_initialize);
 
             // create SVM
             const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type)
diff --git a/src/plssvm/backend_types.cpp b/src/plssvm/backend_types.cpp
index 0d01bb837..34789a764 100644
--- a/src/plssvm/backend_types.cpp
+++ b/src/plssvm/backend_types.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -35,6 +36,9 @@ std::vector<backend_type> list_available_backends() {
 #if defined(PLSSVM_HAS_STDPAR_BACKEND)
     available_backends.push_back(backend_type::stdpar);
 #endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    available_backends.push_back(backend_type::hpx);
+#endif
 #if defined(PLSSVM_HAS_CUDA_BACKEND)
     available_backends.push_back(backend_type::cuda);
 #endif
@@ -61,7 +65,7 @@ backend_type determine_default_backend(const std::vector<backend_type> &availabl
         decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
         decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
         decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::stdpar } },
-        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::stdpar } }
+        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::hpx, backend_type::stdpar } }
     };
 
     // return the default backend based on the previously defined decision order
@@ -87,6 +91,8 @@ std::ostream &operator<<(std::ostream &out, const backend_type backend) {
             return out << "openmp";
         case backend_type::stdpar:
             return out << "stdpar";
+        case backend_type::hpx:
+            return out << "hpx";
         case backend_type::cuda:
             return out << "cuda";
         case backend_type::hip:
@@ -110,6 +116,8 @@ std::istream &operator>>(std::istream &in, backend_type &backend) {
         backend = backend_type::openmp;
     } else if (str == "stdpar") {
         backend = backend_type::stdpar;
+    } else if (str == "hpx") {
+        backend = backend_type::hpx;
     } else if (str == "cuda") {
         backend = backend_type::cuda;
     } else if (str == "hip") {
diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
new file mode 100644
index 000000000..8ebde5e46
--- /dev/null
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -0,0 +1,63 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+list(APPEND CMAKE_MESSAGE_INDENT "HPX:  ")
+
+# check if HPX can be enabled
+message(CHECK_START "Checking for HPX backend")
+
+find_package(HPX 1.9.0)
+
+if (NOT HPX_FOUND)
+    message(CHECK_FAIL "not found")
+    if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+        message(SEND_ERROR "Cannot find requested backend: HPX!")
+    endif ()
+    return()
+else ()
+    if (NOT DEFINED PLSSVM_CPU_TARGET_ARCHS)
+        if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+            message(SEND_ERROR "Found requested HPX backend, but no \"cpu\" targets were specified!")
+        else ()
+            message(STATUS "Found HPX backend, but no \"cpu\" targets were specified!")
+        endif ()
+        message(CHECK_FAIL "skipped")
+        return()
+    endif ()
+endif ()
+message(CHECK_PASS "found ")
+
+# explicitly set sources
+set(PLSSVM_HPX_SOURCES
+    ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
+)
+
+# set target properties
+set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
+add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx)
+
+# additional compilation flags
+target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wconversion>)
+
+# link base library against HPX library
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME})
+
+# set compile definition that the HPX backend is available
+target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_HAS_HPX_BACKEND)
+
+# link against interface library
+target_link_libraries(${PLSSVM_ALL_LIBRARY_NAME} INTERFACE ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+
+# mark backend library as install target
+append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+
+# generate summary string
+set(PLSSVM_HPX_BACKEND_SUMMARY_STRING " - HPX: cpu " PARENT_SCOPE)
+
+list(POP_BACK CMAKE_MESSAGE_INDENT)
diff --git a/src/plssvm/backends/HPX/csvm.cpp b/src/plssvm/backends/HPX/csvm.cpp
new file mode 100644
index 000000000..c6adff43d
--- /dev/null
+++ b/src/plssvm/backends/HPX/csvm.cpp
@@ -0,0 +1,265 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/HPX/csvm.hpp"
+
+#include "plssvm/backends/HPX/exceptions.hpp"                                      // plssvm::hpx::backend_exception
+#include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::detail::device_kernel_symm
+#include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::detail::device_kernel_assembly
+#include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::detail::device_kernel_assembly_symm
+#include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
+#include "plssvm/constants.hpp"                                                    // plssvm::real_type
+#include "plssvm/csvm.hpp"                                                         // plssvm::csvm
+#include "plssvm/detail/assert.hpp"                                                // PLSSVM_ASSERT
+#include "plssvm/detail/data_distribution.hpp"                                     // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
+#include "plssvm/detail/memory_size.hpp"                                           // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp"                                         // plssvm::detail::{move_only_any, move_only_any_cast}
+#include "plssvm/detail/utility.hpp"                                               // plssvm::detail::{get_system_memory, unreachable}
+#include "plssvm/kernel_function_types.hpp"                                        // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                                       // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/parameter.hpp"                                                    // plssvm::parameter
+#include "plssvm/shape.hpp"                                                        // plssvm::shape
+#include "plssvm/solver_types.hpp"                                                 // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"                                             // plssvm::target_platform
+
+#include <cstddef>  // std::size_t
+#include <tuple>    // std::tuple, std::make_tuple
+#include <utility>  // std::move
+#include <vector>   // std::vector
+
+namespace plssvm::hpx {
+
+csvm::csvm(parameter params) :
+    csvm{ plssvm::target_platform::automatic, params } { }
+
+csvm::csvm(const target_platform target, parameter params) :
+    ::plssvm::csvm{ params } {
+    this->init(target);
+}
+
+void csvm::init(const target_platform target) {
+    // check if supported target platform has been selected
+    if (target != target_platform::automatic && target != target_platform::cpu) {
+        throw backend_exception{ fmt::format("Invalid target platform '{}' for the HPX backend!", target) };
+    }
+    // the CPU target must be available
+#if !defined(PLSSVM_HAS_CPU_TARGET)
+    throw backend_exception{ "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!" };
+#endif
+
+    plssvm::detail::log(verbosity_level::full,
+                        "\nUsing HPX ({}) as backend with {} thread(s).\n\n",
+                        plssvm::detail::tracking::tracking_entry{ "dependencies", "hpx_version", detail::get_hpx_version() },
+                        plssvm::detail::tracking::tracking_entry{ "backend", "num_threads", detail::get_num_threads() });
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::hpx }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", plssvm::target_platform::cpu }));
+
+    // update the target platform
+    target_ = plssvm::target_platform::cpu;
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const {
+    return { ::plssvm::detail::get_system_memory() };
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const {
+    return this->get_device_memory();
+}
+
+//***************************************************//
+//                        fit                        //
+//***************************************************//
+
+std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const solver_type solver, const parameter &params, const soa_matrix<real_type> &A, const std::vector<real_type> &q_red, const real_type QA_cost) const {
+    PLSSVM_ASSERT(solver != solver_type::automatic, "An explicit solver type must be provided instead of solver_type::automatic!");
+    PLSSVM_ASSERT(!A.empty(), "The matrix to setup on the devices must not be empty!");
+    PLSSVM_ASSERT(A.is_padded(), "The matrix to setup on the devices must be padded!");
+    PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+    PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
+
+    std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
+    ::hpx::future<void> wait = ::hpx::async([&]() {
+        const real_type cost = real_type{ 1.0 } / params.cost;
+
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
+                    std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly<kernel_function_type::linear>(q_red, kernel_matrix, A, QA_cost, cost);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly<kernel_function_type::polynomial>(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly<kernel_function_type::rbf>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly<kernel_function_type::sigmoid>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly<kernel_function_type::laplacian>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly<kernel_function_type::chi_squared>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                    }
+
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
+                }
+                break;
+            case solver_type::cg_implicit:
+                {
+                    // simply return data since in implicit we don't assembly the kernel matrix here!
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
+                }
+                break;
+        }
+    });
+    // wait until operation is completed
+    wait.get();
+    return kernel_matrices_parts;
+}
+
+void csvm::blas_level_3(const solver_type solver, const real_type alpha, const std::vector<::plssvm::detail::move_only_any> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) const {
+    PLSSVM_ASSERT(solver != solver_type::automatic, "An explicit solver type must be provided instead of solver_type::automatic!");
+    PLSSVM_ASSERT(A.size() == 1, "Not enough kernel matrix parts ({}) for the available number of devices (1)!", A.size());
+    PLSSVM_ASSERT(!B.empty(), "The B matrix must not be empty!");
+    PLSSVM_ASSERT(B.is_padded(), "The B matrix must be padded!");
+    PLSSVM_ASSERT(!C.empty(), "The C matrix must not be empty!");
+    PLSSVM_ASSERT(C.is_padded(), "The C matrix must be padded!");
+    PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
+    PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
+
+    ::hpx::future<void> wait = ::hpx::async([&]() {
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    const std::size_t num_rhs = B.shape().x;
+                    const std::size_t num_rows = B.shape().y;
+
+                    const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
+                    PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+
+                    detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
+                }
+                break;
+            case solver_type::cg_implicit:
+                {
+                    const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
+                    PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
+                    PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+                    const real_type cost = real_type{ 1.0 } / params.cost;
+
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                            break;
+                    }
+                }
+                break;
+        }
+    });
+    // wait until operation is completed
+    wait.get();
+}
+
+//***************************************************//
+//                   predict, score                  //
+//***************************************************//
+
+aos_matrix<real_type> csvm::predict_values(const parameter &params,
+                                           const soa_matrix<real_type> &support_vectors,
+                                           const aos_matrix<real_type> &alpha,
+                                           const std::vector<real_type> &rho,
+                                           soa_matrix<real_type> &w,
+                                           const soa_matrix<real_type> &predict_points) const {
+    PLSSVM_ASSERT(!support_vectors.empty(), "The support vectors must not be empty!");
+    PLSSVM_ASSERT(support_vectors.is_padded(), "The support vectors must be padded!");
+    PLSSVM_ASSERT(!alpha.empty(), "The alpha vectors (weights) must not be empty!");
+    PLSSVM_ASSERT(alpha.is_padded(), "The alpha vectors (weights) must be padded!");
+    PLSSVM_ASSERT(support_vectors.num_rows() == alpha.num_cols(), "The number of support vectors ({}) and number of weights ({}) must be the same!", support_vectors.num_rows(), alpha.num_cols());
+    PLSSVM_ASSERT(rho.size() == alpha.num_rows(), "The number of rho values ({}) and the number of weight vectors ({}) must be the same!", rho.size(), alpha.num_rows());
+    PLSSVM_ASSERT(w.empty() || w.is_padded(), "Either w must be empty or must be padded!");
+    PLSSVM_ASSERT(w.empty() || support_vectors.num_cols() == w.num_cols(), "Either w must be empty or contain exactly the same number of values ({}) as features are present ({})!", w.num_cols(), support_vectors.num_cols());
+    PLSSVM_ASSERT(w.empty() || alpha.num_rows() == w.num_rows(), "Either w must be empty or contain exactly the same number of vectors ({}) as the alpha vector ({})!", w.num_rows(), alpha.num_rows());
+    PLSSVM_ASSERT(!predict_points.empty(), "The data points to predict must not be empty!");
+    PLSSVM_ASSERT(predict_points.is_padded(), "The data points to predict must be padded!");
+    PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "The number of features in the support vectors ({}) must be the same as in the data points to predict ({})!", support_vectors.num_cols(), predict_points.num_cols());
+
+    // defined sizes
+    const std::size_t num_classes = alpha.num_rows();
+    const std::size_t num_predict_points = predict_points.num_rows();
+    const std::size_t num_features = predict_points.num_cols();
+
+    // num_predict_points x num_classes
+    aos_matrix<real_type> out{ plssvm::shape{ num_predict_points, num_classes }, real_type{ 0.0 }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
+
+    ::hpx::future<void> wait = ::hpx::async([&]() {
+        if (params.kernel_type == kernel_function_type::linear) {
+            // special optimization for the linear kernel function
+            if (w.empty()) {
+                // fill w vector
+                w = soa_matrix<real_type>{ plssvm::shape{ num_classes, num_features }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
+                detail::device_kernel_w_linear(w, alpha, support_vectors);
+            }
+        }
+
+        // call the predict kernels
+        switch (params.kernel_type) {
+            case kernel_function_type::linear:
+                // predict the values using the w vector
+                detail::device_kernel_predict_linear(out, w, rho, predict_points);
+                break;
+            case kernel_function_type::polynomial:
+                detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::rbf:
+                detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::sigmoid:
+                detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::laplacian:
+                detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::chi_squared:
+                detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+                break;
+        }
+    });
+    // wait until operation is completed
+    wait.get();
+    return out;
+}
+
+}  // namespace plssvm::hpx
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
new file mode 100644
index 000000000..c71c43507
--- /dev/null
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/HPX/detail/utility.hpp"
+
+#include <hpx/runtime_distributed.hpp>  // ::hpx::get_num_worker_threads
+#include <hpx/version.hpp>              // ::hpx::full_version_as_string
+#include <string>                       // std::string
+
+namespace plssvm::hpx::detail {
+
+std::string get_hpx_version() {
+    return ::hpx::full_version_as_string();
+}
+
+int get_num_threads() {
+    // get the number of used HPX threads
+    return static_cast<int>(::hpx::get_num_worker_threads());
+}
+}  // namespace plssvm::hpx::detail
diff --git a/src/plssvm/backends/HPX/exceptions.cpp b/src/plssvm/backends/HPX/exceptions.cpp
new file mode 100644
index 000000000..8b1da9124
--- /dev/null
+++ b/src/plssvm/backends/HPX/exceptions.cpp
@@ -0,0 +1,22 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/HPX/exceptions.hpp"
+
+#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
+
+#include <string>  // std::string
+
+namespace plssvm::hpx {
+
+backend_exception::backend_exception(const std::string &msg, source_location loc) :
+    ::plssvm::exception{ msg, "hpx::backend_exception", loc } { }
+
+}  // namespace plssvm::hpx
diff --git a/tests/backend_types.cpp b/tests/backend_types.cpp
index 9975fbbfc..4b0f27aae 100644
--- a/tests/backend_types.cpp
+++ b/tests/backend_types.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -34,6 +35,7 @@ TEST(BackendType, to_string) {
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::automatic, "automatic");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::openmp, "openmp");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::stdpar, "stdpar");
+    EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::hpx, "hpx");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::cuda, "cuda");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::hip, "hip");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::opencl, "opencl");
@@ -42,7 +44,7 @@ TEST(BackendType, to_string) {
 
 TEST(BackendType, to_string_unknown) {
     // check conversions to std::string from unknown backend_type
-    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::backend_type>(7), "unknown");
+    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::backend_type>(8), "unknown");
 }
 
 // check whether the std::string -> plssvm::backend_type conversions are correct
@@ -54,6 +56,8 @@ TEST(BackendType, from_string) {
     EXPECT_CONVERSION_FROM_STRING("AUTO", plssvm::backend_type::automatic);
     EXPECT_CONVERSION_FROM_STRING("openmp", plssvm::backend_type::openmp);
     EXPECT_CONVERSION_FROM_STRING("OpenMP", plssvm::backend_type::openmp);
+    EXPECT_CONVERSION_FROM_STRING("hpx", plssvm::backend_type::hpx);
+    EXPECT_CONVERSION_FROM_STRING("HPX", plssvm::backend_type::hpx);
     EXPECT_CONVERSION_FROM_STRING("stdpar", plssvm::backend_type::stdpar);
     EXPECT_CONVERSION_FROM_STRING("STDPAR", plssvm::backend_type::stdpar);
     EXPECT_CONVERSION_FROM_STRING("cuda", plssvm::backend_type::cuda);
@@ -105,6 +109,7 @@ TEST_P(BackendTypeUnsupportedCombination, unsupported_backend_target_platform_co
 INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeUnsupportedCombination, ::testing::Values(
          unsupported_combination_type{ { plssvm::backend_type::cuda, plssvm::backend_type::hip }, { plssvm::target_platform::cpu } },
          unsupported_combination_type{ { plssvm::backend_type::openmp }, { plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel } },
+         unsupported_combination_type{ { plssvm::backend_type::hpx }, { plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel } },
          unsupported_combination_type{ { plssvm::backend_type::cuda }, { plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel } },
          unsupported_combination_type{ { plssvm::backend_type::hip }, { plssvm::target_platform::gpu_intel } }),
          naming::pretty_print_unsupported_backend_combination<BackendTypeUnsupportedCombination>);
@@ -122,6 +127,7 @@ TEST_P(BackendTypeSupportedCombination, supported_backend_target_platform_combin
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeSupportedCombination, ::testing::Values(
          supported_combination_type{ { plssvm::backend_type::openmp }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::openmp },
+         supported_combination_type{ { plssvm::backend_type::hpx }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::hpx },
          supported_combination_type{ { plssvm::backend_type::stdpar }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::stdpar },
          supported_combination_type{ { plssvm::backend_type::cuda }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::cuda },
          supported_combination_type{ { plssvm::backend_type::hip }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::hip },
@@ -137,6 +143,7 @@ INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeSupportedCombination, ::testing
 TEST(BackendType, csvm_to_backend_type) {
     // test the type_trait
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::openmp::csvm>::value, plssvm::backend_type::openmp);
+    EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::hpx::csvm>::value, plssvm::backend_type::hpx);
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::stdpar::csvm>::value, plssvm::backend_type::stdpar);
     EXPECT_EQ(plssvm::csvm_to_backend_type<const plssvm::cuda::csvm>::value, plssvm::backend_type::cuda);
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::hip::csvm &>::value, plssvm::backend_type::hip);
@@ -152,6 +159,7 @@ TEST(BackendType, csvm_to_backend_type) {
 TEST(BackendType, csvm_to_backend_type_v) {
     // test the type_trait
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::openmp::csvm>, plssvm::backend_type::openmp);
+    EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::hpx::csvm>, plssvm::backend_type::hpx);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::stdpar::csvm>, plssvm::backend_type::stdpar);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<const plssvm::cuda::csvm>, plssvm::backend_type::cuda);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::hip::csvm &>, plssvm::backend_type::hip);
diff --git a/tests/backends/CMakeLists.txt b/tests/backends/CMakeLists.txt
index 805e8bc1b..ec6a5fa76 100644
--- a/tests/backends/CMakeLists.txt
+++ b/tests/backends/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
 ## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
 ## License: This file is part of the PLSSVM project which is released under the MIT license.
 ##          See the LICENSE.md file in the project root for full license information.
@@ -9,6 +9,11 @@ if (TARGET ${PLSSVM_OPENMP_BACKEND_LIBRARY_NAME})
     add_subdirectory(OpenMP)
 endif ()
 
+# create HPX tests if the HPX backend is available
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+    add_subdirectory(HPX)
+endif ()
+
 # create stdpar tests if the stdpar backend is available
 if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
     add_subdirectory(stdpar)
@@ -32,4 +37,4 @@ endif ()
 # create SYCL tests if the SYCL backend is available
 if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME})
     add_subdirectory(SYCL)
-endif ()
\ No newline at end of file
+endif ()
diff --git a/tests/backends/HPX/CMakeLists.txt b/tests/backends/HPX/CMakeLists.txt
new file mode 100644
index 000000000..194c132fe
--- /dev/null
+++ b/tests/backends/HPX/CMakeLists.txt
@@ -0,0 +1,33 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+find_package(HPX 1.9.0 REQUIRED)
+
+## create HPX tests
+set(PLSSVM_HPX_TEST_NAME HPX_tests)
+
+# list all necessary sources
+set(PLSSVM_HPX_TEST_SOURCES
+    ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/hpx_csvm.cpp
+)
+
+# add test executable
+add_executable(${PLSSVM_HPX_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../hpx_main.cpp ${PLSSVM_HPX_TEST_SOURCES})
+
+# link against test library
+target_link_libraries(${PLSSVM_HPX_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME} HPX::hpx HPX::wrap_main)
+
+# add tests to google test
+include(GoogleTest)
+include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
+discover_tests_with_death_test_filter(${PLSSVM_HPX_TEST_NAME})
+
+# add test as coverage dependency
+if (TARGET coverage)
+    add_dependencies(coverage ${PLSSVM_HPX_TEST_NAME})
+endif ()
diff --git a/tests/backends/HPX/detail/utility.cpp b/tests/backends/HPX/detail/utility.cpp
new file mode 100644
index 000000000..16647745a
--- /dev/null
+++ b/tests/backends/HPX/detail/utility.cpp
@@ -0,0 +1,24 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the custom utility functions related to the HPX backend.
+ */
+
+#include "plssvm/backends/HPX/detail/utility.hpp"
+
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_NE, EXPECT_NO_THROW, EXPECT_FALSE
+
+#include <string>  // std::string
+
+TEST(HPXUtility, get_num_threads) {
+    EXPECT_GT(plssvm::hpx::detail::get_num_threads(), 0);
+}
+
+TEST(HPXUtility, get_hpx_version) {
+    EXPECT_FALSE(plssvm::hpx::detail::get_hpx_version().empty());
+}
diff --git a/tests/backends/HPX/exceptions.cpp b/tests/backends/HPX/exceptions.cpp
new file mode 100644
index 000000000..123c1000e
--- /dev/null
+++ b/tests/backends/HPX/exceptions.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the custom exception classes related to the HPX backend.
+ */
+
+#include "plssvm/backends/HPX/exceptions.hpp"  // plssvm::hpx::backend_exception
+
+#include "tests/backends/generic_exceptions_tests.hpp"  // generic exception tests to instantiate
+
+#include "gtest/gtest.h"  // INSTANTIATE_TYPED_TEST_SUITE_P
+
+#include <string_view>  // std::string_view
+
+struct exception_test_type {
+    using exception_type = plssvm::hpx::backend_exception;
+    constexpr static std::string_view name = "hpx::backend_exception";
+};
+
+// instantiate type-parameterized tests
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXPExceptions, Exception, exception_test_type);
diff --git a/tests/backends/HPX/hpx_csvm.cpp b/tests/backends/HPX/hpx_csvm.cpp
new file mode 100644
index 000000000..1f5fd9f48
--- /dev/null
+++ b/tests/backends/HPX/hpx_csvm.cpp
@@ -0,0 +1,601 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the functionality related to the HPX backend.
+ */
+
+#include "plssvm/backend_types.hpp"                                                // plssvm::csvm_to_backend_type_v
+#include "plssvm/backends/HPX/csvm.hpp"                                            // plssvm::hpx::csvm
+#include "plssvm/backends/HPX/exceptions.hpp"                                      // plssvm::hpx::backend_exception
+#include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::device_kernel_symm
+#include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::device_kernel_assembly
+#include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::device_kernel_assembly_symm
+#include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
+#include "plssvm/constants.hpp"                                                    // plssvm::PADDING_SIZE
+#include "plssvm/data_set.hpp"                                                     // plssvm::data_set
+#include "plssvm/detail/arithmetic_type_name.hpp"                                  // plssvm::detail::arithmetic_type_name
+#include "plssvm/detail/data_distribution.hpp"                                     // plssvm::detail::triangular_data_distribution
+#include "plssvm/detail/type_list.hpp"                                             // plssvm::detail::supported_label_types
+#include "plssvm/kernel_function_types.hpp"                                        // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                                       // plssvm::soa_matrix
+#include "plssvm/parameter.hpp"                                                    // plssvm::parameter, plssvm::detail::parameter, plssvm::kernel_type, plssvm::cost
+#include "plssvm/shape.hpp"                                                        // plssvm::shape
+#include "plssvm/target_platforms.hpp"                                             // plssvm::target_platform
+
+#include "tests/backends/generic_csvm_tests.hpp"  // generic CSVM tests to instantiate
+#include "tests/backends/ground_truth.hpp"        // ground_truth::{perform_dimensional_reduction, assemble_device_specific_kernel_matrix, assemble_full_kernel_matrix, gemm, calculate_w}
+#include "tests/backends/HPX/mock_hpx_csvm.hpp"   // mock_hpx_csvm
+#include "tests/custom_test_macros.hpp"           // EXPECT_THROW_WHAT
+#include "tests/naming.hpp"                       // naming::test_parameter_to_name
+#include "tests/types_to_test.hpp"                // util::{cartesian_type_product_t, combine_test_parameters_gtest_t}
+#include "tests/utility.hpp"                      // util::redirect_output
+
+#include "fmt/format.h"   // fmt::format
+#include "gtest/gtest.h"  // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test
+
+#include <algorithm>  // std::min
+#include <cstddef>    // std::size_t
+#include <tuple>      // std::make_tuple, std::tuple
+#include <vector>     // std::vector
+
+class HPXCSVM : public ::testing::Test,
+                private util::redirect_output<> { };
+
+// check whether the constructor correctly fails when using an incompatible target platform
+TEST_F(HPXCSVM, construct_parameter) {
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    // the automatic target platform must always be available
+    EXPECT_NO_THROW(plssvm::hpx::csvm{ plssvm::parameter{} });
+#else
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::parameter{} }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+}
+
+TEST_F(HPXCSVM, construct_target_and_parameter) {
+    // create parameter struct
+    const plssvm::parameter params{};
+
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    // only automatic or cpu are allowed as target platform for the HPX backend
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::automatic, params }));
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::cpu, params }));
+#else
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::automatic, params }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::cpu, params }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+    // all other target platforms must throw
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_nvidia, params }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_nvidia' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_amd, params }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_amd' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_intel, params }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_intel' for the HPX backend!");
+}
+
+TEST_F(HPXCSVM, construct_target_and_named_args) {
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    // only automatic or cpu are allowed as target platform for the HPX backend
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::cpu, plssvm::cost = 2.0 }));
+#else
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::cpu, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+    // all other target platforms must throw
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_nvidia' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_amd, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_amd' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_intel, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_intel' for the HPX backend!");
+}
+
+struct hpx_csvm_test_type {
+    using mock_csvm_type = mock_hpx_csvm;
+    using csvm_type = plssvm::hpx::csvm;
+    using device_ptr_type = const plssvm::soa_matrix<plssvm::real_type> *;
+    inline constexpr static auto additional_arguments = std::make_tuple();
+};
+
+using hpx_csvm_test_tuple = std::tuple<hpx_csvm_test_type>;
+using hpx_csvm_test_label_type_list = util::cartesian_type_product_t<hpx_csvm_test_tuple, plssvm::detail::supported_label_types>;
+using hpx_csvm_test_type_list = util::cartesian_type_product_t<hpx_csvm_test_tuple>;
+
+// the tests used in the instantiated GTest test suites
+using hpx_csvm_test_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list>;
+using hpx_solver_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list, util::solver_type_list>;
+using hpx_kernel_function_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list, util::kernel_function_type_list>;
+using hpx_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list, util::solver_and_kernel_function_type_list>;
+using hpx_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_label_type_list, util::kernel_function_and_classification_type_list>;
+using hpx_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_label_type_list, util::solver_and_kernel_function_and_classification_type_list>;
+
+// instantiate type-parameterized tests
+// generic CSVM tests
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVM, hpx_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMKernelFunction, hpx_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMSolver, hpx_solver_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMSolverKernelFunction, hpx_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMKernelFunctionClassification, hpx_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMSolverKernelFunctionClassification, hpx_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
+
+// generic CSVM DeathTests
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMDeathTest, hpx_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMSolverDeathTest, hpx_solver_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, hpx_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, hpx_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name);
+
+TEST_F(HPXCSVM, blas_level_3_kernel_explicit) {
+    const plssvm::real_type alpha{ 1.0 };
+
+    // create kernel matrix to use in the BLAS calculation
+    const plssvm::parameter params{ plssvm::gamma = plssvm::real_type{ 0.001 } };
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data.data());
+
+    // create correct data distribution for the ground truth calculation
+    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+    const std::vector<plssvm::real_type> kernel_matrix = ground_truth::assemble_device_specific_kernel_matrix(params, data.data(), q_red, QA_cost, dist, 0);
+
+    const auto B = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const plssvm::real_type beta{ 0.5 };
+    auto C = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    auto ground_truth_C{ C };
+
+    const std::size_t num_rhs = B.shape().x;
+    const std::size_t num_rows = B.shape().y;
+    plssvm::hpx::detail::device_kernel_symm(num_rows, num_rhs, alpha, kernel_matrix, B, beta, C);
+
+    // calculate correct results
+    const plssvm::aos_matrix<plssvm::real_type> kernel_matrix_gemm_padded = ground_truth::assemble_full_kernel_matrix(params, data.data(), q_red, QA_cost);
+    ground_truth::gemm(alpha, kernel_matrix_gemm_padded, B, beta, ground_truth_C);
+
+    // check C for correctness
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(C, ground_truth_C);
+}
+
+TEST_F(HPXCSVM, calculate_w) {
+    // the data used for prediction
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    // the weights (i.e., alpha values) for all support vectors
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    // calculate w
+    plssvm::soa_matrix<plssvm::real_type> w{ plssvm::shape{ weights.num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+    plssvm::hpx::detail::device_kernel_w_linear(w, weights, data.data());
+
+    // calculate correct results
+    const plssvm::soa_matrix<plssvm::real_type> correct_w = ground_truth::calculate_w(weights, data.data());
+
+    // check C for correctness
+    EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(w, correct_w, 1e6);
+}
+
+using kernel_function_type_list_gtest = util::combine_test_parameters_gtest_t<util::kernel_function_type_list>;
+
+template <typename T>
+class HPXCSVMKernelFunction : public HPXCSVM { };
+
+TYPED_TEST_SUITE(HPXCSVMKernelFunction, kernel_function_type_list_gtest, naming::test_parameter_to_name);
+
+TYPED_TEST(HPXCSVMKernelFunction, assemble_kernel_matrix_explicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    auto data_matr{ data.data() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        data_matr = util::matrix_abs(data_matr);
+    }
+
+    // create correct data distribution for the ground truth calculation
+    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data_matr);
+    const plssvm::real_type cost = plssvm::real_type{ 1.0 } / params.cost;
+
+    std::vector<plssvm::real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+
+    switch (kernel) {
+        case plssvm::kernel_function_type::linear:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::linear>(q_red, kernel_matrix, data_matr, QA_cost, cost);
+            break;
+        case plssvm::kernel_function_type::polynomial:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::polynomial>(q_red, kernel_matrix, data_matr, QA_cost, cost, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::rbf:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::rbf>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::sigmoid:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::laplacian:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::laplacian>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::chi_squared:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            break;
+    }
+    const std::vector<plssvm::real_type> correct_kernel_matrix = ground_truth::assemble_device_specific_kernel_matrix(params, data_matr, q_red, QA_cost, dist, 0);
+
+    // check for correctness
+    ASSERT_EQ(kernel_matrix.size(), correct_kernel_matrix.size());
+    EXPECT_FLOATING_POINT_VECTOR_NEAR_EPS(kernel_matrix, correct_kernel_matrix, 1e6);
+}
+
+TYPED_TEST(HPXCSVMKernelFunction, blas_level_3_kernel_implicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    const plssvm::real_type alpha{ 1.0 };
+
+    // create kernel matrix to use in the BLAS calculation
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    auto data_matr{ data.data() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        data_matr = util::matrix_abs(data_matr);
+    }
+
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data_matr);
+    const plssvm::real_type cost = plssvm::real_type{ 1.0 } / params.cost;
+
+    const auto B = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const plssvm::real_type beta{ 0.5 };
+    auto C = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    auto ground_truth_C{ C };
+
+    switch (kernel) {
+        case plssvm::kernel_function_type::linear:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C);
+            break;
+        case plssvm::kernel_function_type::polynomial:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::rbf:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::sigmoid:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::laplacian:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::chi_squared:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            break;
+    }
+
+    // calculate correct results
+    const plssvm::aos_matrix<plssvm::real_type> kernel_matrix_gemm_padded = ground_truth::assemble_full_kernel_matrix(params, data_matr, q_red, QA_cost);
+    ground_truth::gemm(alpha, kernel_matrix_gemm_padded, B, beta, ground_truth_C);
+
+    // check C for correctness
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(C, ground_truth_C);
+}
+
+TYPED_TEST(HPXCSVMKernelFunction, predict_values) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    auto data_matr{ data.data() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        data_matr = util::matrix_abs(data_matr);
+    }
+
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data_matr.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const auto predict_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data_matr.num_rows(), data_matr.num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const std::vector<plssvm::real_type> rho = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+    const plssvm::soa_matrix<plssvm::real_type> correct_w = ground_truth::calculate_w(weights, data_matr);
+
+    plssvm::aos_matrix<plssvm::real_type> out{ plssvm::shape{ predict_points.num_rows(), weights.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    switch (kernel) {
+        case plssvm::kernel_function_type::linear:
+            plssvm::hpx::detail::device_kernel_predict_linear(out, correct_w, rho, predict_points);
+            break;
+        case plssvm::kernel_function_type::polynomial:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::polynomial>(out, weights, rho, data_matr, predict_points, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::rbf:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::rbf>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::sigmoid:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::laplacian:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::laplacian>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::chi_squared:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            break;
+    }
+
+    // check out for correctness
+    const plssvm::aos_matrix<plssvm::real_type> correct_out = ground_truth::predict_values(params, correct_w, weights, rho, data_matr, predict_points);
+    EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(out, correct_out, 1e6);
+}
+
+//*************************************************************************************************************************************//
+//                                                           CSVM DeathTests                                                           //
+//*************************************************************************************************************************************//
+
+class HPXCSVMDeathTest : public HPXCSVM { };
+
+TEST_F(HPXCSVMDeathTest, blas_level_3_kernel_explicit) {
+    const plssvm::real_type alpha{ 1.0 };
+
+    // create kernel matrix to use in the BLAS calculation
+    const std::vector<plssvm::real_type> kernel_matrix((4 + plssvm::PADDING_SIZE) * (4 + plssvm::PADDING_SIZE + 1) / 2);
+
+    const auto B = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const plssvm::real_type beta{ 0.5 };
+    auto C = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const std::size_t num_rhs = B.shape().x;
+    const std::size_t num_rows = B.shape().y;
+
+    // the A matrix must have the correct size
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_symm(num_rows, num_rows, alpha, std::vector<plssvm::real_type>{}, B, beta, C), fmt::format("A matrix sizes mismatch!: 0 != {}", kernel_matrix.size()));
+
+    // the B matrix must have the correct shape
+    const auto B_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_symm(num_rows, num_rows, alpha, kernel_matrix, B_wrong, beta, C), ::testing::HasSubstr(fmt::format("B matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+
+    // the C matrix must have the correct shape
+    auto C_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_symm(num_rows, num_rows, alpha, kernel_matrix, B, beta, C_wrong), ::testing::HasSubstr(fmt::format("C matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+}
+
+TEST_F(HPXCSVMDeathTest, calculate_w) {
+    // the data used for prediction
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    // the weights (i.e., alpha values) for all support vectors
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    plssvm::soa_matrix<plssvm::real_type> w(plssvm::shape{ weights.num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    // the weights and support vector matrix shapes must match
+    const auto weights_wrong = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_w_linear(w, weights_wrong, data.data()), fmt::format("Size mismatch: {} vs {}!", weights_wrong.num_cols(), data.data().num_rows()));
+    // the w shape must be correct
+    plssvm::soa_matrix<plssvm::real_type> w_wrong{};
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_w_linear(w_wrong, weights, data.data()), ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs [{}, {}]!", weights.num_rows(), data.data().num_cols())));
+}
+
+template <typename T>
+class HPXCSVMKernelFunctionDeathTest : public HPXCSVM { };
+
+TYPED_TEST_SUITE(HPXCSVMKernelFunctionDeathTest, kernel_function_type_list_gtest, naming::test_parameter_to_name);
+
+TYPED_TEST(HPXCSVMKernelFunctionDeathTest, assemble_kernel_matrix_explicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    // create correct data for the function call
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    // create correct data distribution for the ground truth calculation
+    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data.data());
+
+    // create correct data distribution for the ground truth calculation
+    std::vector<plssvm::real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+
+    // helper lambda to reduce the amount of needed switches!
+    const auto run_assembly = [=](const plssvm::parameter &params_p, const std::vector<plssvm::real_type> &q_red_p, std::vector<plssvm::real_type> &kernel_matrix_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const plssvm::real_type QA_cost_p) {
+        switch (kernel) {
+            case plssvm::kernel_function_type::linear:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::linear>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost);
+                break;
+            case plssvm::kernel_function_type::polynomial:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::polynomial>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::rbf:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::rbf>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::sigmoid:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::laplacian:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::laplacian>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::chi_squared:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+        }
+    };
+
+    // check q_red size (must be equal to the number of data points - 1
+    EXPECT_DEATH(run_assembly(params, std::vector<plssvm::real_type>{}, kernel_matrix, data.data(), QA_cost), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
+
+    // check the kernel matrix size (depending on the usage of GEMM/SYMM)
+    std::vector<plssvm::real_type> ret;
+    EXPECT_DEATH(run_assembly(params, q_red, ret, data.data(), QA_cost), ::testing::HasSubstr(fmt::format("Sizes mismatch (SYMM)!: 0 != {}", kernel_matrix.size())));
+
+    // cost must not be 0.0 since 1.0 / cost is used
+    params.cost = plssvm::real_type{ 0.0 };
+    EXPECT_DEATH(run_assembly(params, q_red, kernel_matrix, data.data(), QA_cost), "cost must not be 0.0 since it is 1 / plssvm::cost!");
+}
+
+TYPED_TEST(HPXCSVMKernelFunctionDeathTest, blas_level_3_kernel_implicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    // create correct data for the function call
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    std::vector<plssvm::real_type> q_red{};
+    plssvm::real_type QA_cost{};
+    std::tie(q_red, QA_cost) = ground_truth::perform_dimensional_reduction(params, data.data());
+    const plssvm::real_type alpha{ 1.0 };
+    plssvm::soa_matrix<plssvm::real_type> B{ plssvm::shape{ data.num_classes(), data.num_data_points() - 1 } };
+    const plssvm::real_type beta{ 1.0 };
+    plssvm::soa_matrix<plssvm::real_type> C{ B };
+
+    // helper lambda to reduce the amount of needed switches!
+    const auto run_assembly_symm = [=](const plssvm::parameter &params_p, const std::vector<plssvm::real_type> &q_red_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const plssvm::soa_matrix<plssvm::real_type> &B_p, plssvm::soa_matrix<plssvm::real_type> &C_p) {
+        switch (kernel) {
+            case plssvm::kernel_function_type::linear:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p);
+                break;
+            case plssvm::kernel_function_type::polynomial:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::rbf:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::sigmoid:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::laplacian:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::chi_squared:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+        }
+    };
+
+    // check q_red size (must be equal to the number of data points - 1
+    EXPECT_DEATH(run_assembly_symm(params, std::vector<plssvm::real_type>{}, data.data(), B, C), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
+
+    // cost must not be 0.0 since 1.0 / cost is used
+    plssvm::parameter params2{ params };
+    params2.cost = plssvm::real_type{ 0.0 };
+    EXPECT_DEATH(run_assembly_symm(params2, q_red, data.data(), B, C), "cost must not be 0.0 since it is 1 / plssvm::cost!");
+
+    // B and C must be of the same shape
+    B = plssvm::soa_matrix<plssvm::real_type>{ plssvm::shape{ 1, 1 } };
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), B, C), "The matrices B and C must have the same shape!");
+
+    // the number of columns in B must match the number of rows in the data set - 1
+    B = plssvm::soa_matrix<plssvm::real_type>{ plssvm::shape{ data.num_classes(), data.num_data_points() - 2 } };
+    C = B;
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), B, C), ::testing::HasSubstr(fmt::format("The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), data.num_data_points() - 1)));
+}
+
+TYPED_TEST(HPXCSVMKernelFunctionDeathTest, predict_values) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.data().num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const auto predict_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const std::vector<plssvm::real_type> rho = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+    const plssvm::soa_matrix<plssvm::real_type> w = ground_truth::calculate_w(weights, data.data());
+
+    plssvm::aos_matrix<plssvm::real_type> out{ plssvm::shape{ predict_points.num_rows(), weights.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    if constexpr (kernel == plssvm::kernel_function_type::linear) {
+        // the number of classes must match
+        std::vector<plssvm::real_type> rho_wrong = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+        rho_wrong.pop_back();
+        EXPECT_DEATH(plssvm::hpx::detail::device_kernel_predict_linear(out, w, rho_wrong, predict_points),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_rows(), rho_wrong.size())));
+
+        // the number of features must match
+        const auto predict_points_wrong = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(plssvm::hpx::detail::device_kernel_predict_linear(out, w, rho, predict_points_wrong),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_cols(), predict_points_wrong.num_cols())));
+
+        // the output shape must match
+        plssvm::aos_matrix<plssvm::real_type> out_wrong{};
+        EXPECT_DEATH(plssvm::hpx::detail::device_kernel_predict_linear(out_wrong, w, rho, predict_points),
+                     ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs {}!", (plssvm::shape{ predict_points.num_rows(), w.num_rows() }))));
+    } else {
+        // helper lambda to reduce the amount of needed switches!
+        const auto run_predict_values = [=](const plssvm::parameter &params_p, plssvm::aos_matrix<plssvm::real_type> &out_p, const plssvm::aos_matrix<plssvm::real_type> &weights_p, const std::vector<plssvm::real_type> &rho_p, const plssvm::soa_matrix<plssvm::real_type> &support_vectors_p, const plssvm::soa_matrix<plssvm::real_type> &predict_points_p) {
+            switch (kernel) {
+                case plssvm::kernel_function_type::linear:
+                    // unreachable
+                    break;
+                case plssvm::kernel_function_type::polynomial:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::polynomial>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                    break;
+                case plssvm::kernel_function_type::rbf:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::rbf>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    break;
+                case plssvm::kernel_function_type::sigmoid:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                    break;
+                case plssvm::kernel_function_type::laplacian:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::laplacian>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    break;
+                case plssvm::kernel_function_type::chi_squared:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    break;
+            }
+        };
+
+        // the number of classes must match
+        std::vector<plssvm::real_type> rho_wrong = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+        rho_wrong.pop_back();
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho_wrong, data.data(), predict_points),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_rows(), rho_wrong.size())));
+
+        // the number of support vectors and weights must match
+        const auto weights_wrong = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.data().num_rows() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(run_predict_values(params, out, weights_wrong, rho, data.data(), predict_points),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", weights_wrong.num_cols(), data.data().num_rows())));
+
+        // the number of features must match
+        const auto predict_points_wrong = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho, data.data(), predict_points_wrong),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", data.data().num_cols(), predict_points_wrong.num_cols())));
+
+        // the output shape must match
+        plssvm::aos_matrix<plssvm::real_type> out_wrong{};
+        EXPECT_DEATH(run_predict_values(params, out_wrong, weights, rho, data.data(), predict_points),
+                     ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs {}!", (plssvm::shape{ predict_points.num_rows(), w.num_rows() }))));
+    }
+}
diff --git a/tests/backends/HPX/mock_hpx_csvm.hpp b/tests/backends/HPX/mock_hpx_csvm.hpp
new file mode 100644
index 000000000..e6263393d
--- /dev/null
+++ b/tests/backends/HPX/mock_hpx_csvm.hpp
@@ -0,0 +1,48 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief MOCK class for the C-SVM class using the HPX backend.
+ */
+
+#ifndef PLSSVM_TESTS_BACKENDS_HPX_MOCK_HPX_CSVM_HPP_
+#define PLSSVM_TESTS_BACKENDS_HPX_MOCK_HPX_CSVM_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/csvm.hpp"  // plssvm::hpx::csvm
+
+/**
+ * @brief GTest mock class for the HPX CSVM.
+ */
+class mock_hpx_csvm final : public plssvm::hpx::csvm {
+    using base_type = plssvm::hpx::csvm;
+
+  public:
+    template <typename... Args>
+    explicit mock_hpx_csvm(Args &&...args) :
+        base_type{ std::forward<Args>(args)... } { }
+
+    // make protected member functions public
+    using base_type::assemble_kernel_matrix;
+    using base_type::blas_level_3;
+    using base_type::get_device_memory;
+    using base_type::num_available_devices;
+
+    using base_type::predict_values;
+
+    using base_type::conjugate_gradients;
+    using base_type::perform_dimensional_reduction;
+    using base_type::run_blas_level_3;
+    using base_type::solve_lssvm_system_of_linear_equations;
+
+    using base_type::get_max_mem_alloc_size;
+
+    using base_type::data_distribution_;
+};
+
+#endif  // PLSSVM_TESTS_BACKENDS_HPX_MOCK_HPX_CSVM_HPP_
diff --git a/tests/backends/generic_csvm_tests.hpp b/tests/backends/generic_csvm_tests.hpp
index 562785728..58201bdee 100644
--- a/tests/backends/generic_csvm_tests.hpp
+++ b/tests/backends/generic_csvm_tests.hpp
@@ -2,6 +2,7 @@
  * @file
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -85,8 +86,8 @@ template <typename csvm_type, typename device_ptr_type, typename matrix_type, ty
         return partial_kernel_matrix;
     };
 
-    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
-        // only a single device for OpenMP on the CPU
+    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
+        // only a single device for OpenMP, stdpar, and HPX on the CPU
         result[0] = plssvm::detail::move_only_any{ calculate_partial_kernel_matrix(0, matr.num_rows()) };
     } else {
         for (std::size_t device_id = 0; device_id < csvm.num_available_devices(); ++device_id) {
@@ -131,8 +132,8 @@ template <typename csvm_type, typename device_ptr_type, typename matrix_type, ty
     matr = matrix_type{ matr, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
 
     for (std::size_t device_id = 0; device_id < csvm.num_available_devices(); ++device_id) {
-        // created matrix is different for the OpenMP backend and the GPU backends!
-        if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+        // created matrix is different for the OpenMP, stdpar or HPX backend and the GPU backends!
+        if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
             // only a single device ever in use
             result[0] = plssvm::detail::move_only_any{ std::make_tuple(plssvm::soa_matrix<real_type>{ matr, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }, std::forward<Args>(args)...) };
         } else {
@@ -277,7 +278,7 @@ TYPED_TEST_P(GenericCSVM, num_available_devices) {
     const csvm_type svm = util::construct_from_tuple<csvm_type>(csvm_test_type::additional_arguments);
 
     // the maximum memory allocation size should be greater than 0!
-    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
         EXPECT_EQ(svm.num_available_devices(), 1);
     } else {
         EXPECT_GE(svm.num_available_devices(), 1);
@@ -920,7 +921,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) {
 
                 // get result based on used backend
                 std::vector<plssvm::real_type> kernel_matrix{};
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
                     kernel_matrix = plssvm::detail::move_only_any_cast<std::vector<plssvm::real_type>>(kernel_matrix_d[device_id]);  // std::vector
                 } else {
                     const auto &kernel_matrix_d_ptr = plssvm::detail::move_only_any_cast<const device_ptr_type &>(kernel_matrix_d[device_id]);  // device_ptr -> convert it to a std::vector
@@ -947,7 +948,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) {
                 EXPECT_TRUE(kernel_matrix_d[device_id].has_value());
 
                 // implicit doesn't assemble a kernel matrix!
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
                     const auto &[data_d_ret, params_ret, q_red_ret, QA_cost_ret] = plssvm::detail::move_only_any_cast<const std::tuple<plssvm::soa_matrix<plssvm::real_type>, plssvm::parameter, std::vector<plssvm::real_type>, plssvm::real_type> &>(kernel_matrix_d[device_id]);
 
                     // the values should not have changed! (except the matrix layout)
@@ -1030,7 +1031,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix) {
 
                 // get result based on used backend
                 std::vector<plssvm::real_type> kernel_matrix{};
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
                     kernel_matrix = plssvm::detail::move_only_any_cast<std::vector<plssvm::real_type>>(kernel_matrix_d[device_id]);  // std::vector
                 } else {
                     const auto &kernel_matrix_d_ptr = plssvm::detail::move_only_any_cast<const device_ptr_type &>(kernel_matrix_d[device_id]);  // device_ptr -> convert it to a std::vector
@@ -1057,7 +1058,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix) {
                 EXPECT_TRUE(kernel_matrix_d[device_id].has_value());
 
                 // implicit doesn't assemble a kernel matrix!
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
                     const auto &[data_d_ret, params_ret, q_red_ret, QA_cost_ret] = plssvm::detail::move_only_any_cast<const std::tuple<plssvm::soa_matrix<plssvm::real_type>, plssvm::parameter, std::vector<plssvm::real_type>, plssvm::real_type> &>(kernel_matrix_d[device_id]);
 
                     // the values should not have changed! (except the matrix layout)
diff --git a/tests/csvm.cpp b/tests/csvm.cpp
index c1ec5b47b..9934eaef3 100644
--- a/tests/csvm.cpp
+++ b/tests/csvm.cpp
@@ -198,6 +198,14 @@ TEST(BaseCSVM, csvm_backend_exists) {
     EXPECT_FALSE(plssvm::csvm_backend_exists<plssvm::openmp::csvm>::value);
 #endif
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    EXPECT_TRUE(plssvm::csvm_backend_exists_v<plssvm::hpx::csvm>);
+    EXPECT_TRUE(plssvm::csvm_backend_exists<plssvm::hpx::csvm>::value);
+#else
+    EXPECT_FALSE(plssvm::csvm_backend_exists_v<plssvm::hpx::csvm>);
+    EXPECT_FALSE(plssvm::csvm_backend_exists<plssvm::hpx::csvm>::value);
+#endif
+
 #if defined(PLSSVM_HAS_CUDA_BACKEND)
     EXPECT_TRUE(plssvm::csvm_backend_exists_v<plssvm::cuda::csvm>);
     EXPECT_TRUE(plssvm::csvm_backend_exists<plssvm::cuda::csvm>::value);
diff --git a/tests/csvm_factory.cpp b/tests/csvm_factory.cpp
index cb06f6b68..12f3f3606 100644
--- a/tests/csvm_factory.cpp
+++ b/tests/csvm_factory.cpp
@@ -30,7 +30,7 @@
 
 namespace util {
 
-using csvm_types = std::tuple<plssvm::openmp::csvm, plssvm::stdpar::csvm, plssvm::cuda::csvm, plssvm::hip::csvm, plssvm::opencl::csvm, plssvm::sycl::csvm>;
+using csvm_types = std::tuple<plssvm::openmp::csvm, plssvm::hpx::csvm, plssvm::stdpar::csvm, plssvm::cuda::csvm, plssvm::hip::csvm, plssvm::opencl::csvm, plssvm::sycl::csvm>;
 using csvm_types_gtest = util::combine_test_parameters_gtest_t<util::cartesian_type_product_t<csvm_types>>;
 /// A type list of all supported SYCL C-SVMs.
 using sycl_csvm_types = std::tuple<plssvm::sycl::csvm, plssvm::adaptivecpp::csvm, plssvm::dpcpp::csvm>;
@@ -43,6 +43,9 @@ namespace testing::internal {  // dirty hack to have type names for incomplete t
 template <>
 std::string GetTypeName<util::test_parameter<util::type_list<plssvm::openmp::csvm>, util::value_list<>>>() { return "openmp_csvm"; }
 
+template <>
+std::string GetTypeName<util::test_parameter<util::type_list<plssvm::hpx::csvm>, util::value_list<>>>() { return "hpx_csvm"; }
+
 template <>
 std::string GetTypeName<util::test_parameter<util::type_list<plssvm::stdpar::csvm>, util::value_list<>>>() { return "stdpar_csvm"; }
 
@@ -231,7 +234,7 @@ TEST(CSVMFactory, factory_named_parameter) {
 }
 
 TEST(CSVMFactory, invalid_backend) {
-    EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast<plssvm::backend_type>(7)),
+    EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast<plssvm::backend_type>(8)),
                       plssvm::unsupported_backend_exception,
                       "Unrecognized backend provided!");
 }
diff --git a/tests/detail/cmd/parser_predict.cpp b/tests/detail/cmd/parser_predict.cpp
index 8a04c3b1d..0a4da53d9 100644
--- a/tests/detail/cmd/parser_predict.cpp
+++ b/tests/detail/cmd/parser_predict.cpp
@@ -168,7 +168,7 @@ TEST_P(ParserPredictBackend, parsing) {
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictBackend, ::testing::Combine(
                 ::testing::Values("-b", "--backend"),
-                ::testing::Values("automatic", "OpenMP", "CUDA", "HIP", "OpenCL", "SYCL")),
+                ::testing::Values("automatic", "OpenMP", "HPX", "CUDA", "HIP", "OpenCL", "SYCL")),
                 naming::pretty_print_parameter_flag_and_value<ParserPredictBackend>);
 // clang-format on
 
diff --git a/tests/detail/cmd/parser_train.cpp b/tests/detail/cmd/parser_train.cpp
index ba1392d75..ae1eafbaf 100644
--- a/tests/detail/cmd/parser_train.cpp
+++ b/tests/detail/cmd/parser_train.cpp
@@ -443,7 +443,7 @@ TEST_P(ParserTrainBackend, parsing) {
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainBackend, ::testing::Combine(
                 ::testing::Values("-b", "--backend"),
-                ::testing::Values("automatic", "OpenMP", "CUDA", "HIP", "OpenCL", "SYCL")),
+                ::testing::Values("automatic", "OpenMP", "HPX", "CUDA", "HIP", "OpenCL", "SYCL")),
                 naming::pretty_print_parameter_flag_and_value<ParserTrainBackend>);
 // clang-format on
 
diff --git a/tests/hpx_main.cpp b/tests/hpx_main.cpp
new file mode 100644
index 000000000..dd8fdb984
--- /dev/null
+++ b/tests/hpx_main.cpp
@@ -0,0 +1,60 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast".
+ */
+
+#include "gtest/gtest.h"  // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG},GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions
+
+#include <cstdlib>  // std::atexit
+
+// Workaround as HPX runtime not working properly with Google Test
+// Run the entire main function in HPX runtime
+#include <hpx/hpx_main.hpp>
+
+// silence GTest warnings/test errors
+
+// generic CSVM tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest);
+// generic GPU CSVM tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest);
+// pinned memory tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemory);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayout);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayoutDeathTest);
+// device pointer tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest);
+// exception tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception);
+
+int main(int argc, char **argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+
+    // prevent problems with fork() in the presence of multiple threads
+    // https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads
+    // NOTE: may reduce performance of the (death) tests
+#if !defined(_WIN32)
+    ::testing::GTEST_FLAG(death_test_style) = "threadsafe";
+#endif
+    return RUN_ALL_TESTS();
+}
diff --git a/tests/main.cpp b/tests/main.cpp
index 0623c7a26..247303148 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -9,7 +9,7 @@
  * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast".
  */
 
-#include "plssvm/environment.hpp"  // plssvm::environment::scope_guard
+#include "plssvm/environment.hpp"  // plssvm::environment::{scope_guard, initialize, finalize}
 
 #include "gtest/gtest.h"  // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG},GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions
 
@@ -45,16 +45,15 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest);
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception);
 
 void ensure_finalization() {
-    if (!plssvm::environment::is_finalized()) {
-        plssvm::environment::finalize();
-    }
+    plssvm::environment::finalize();
 }
 
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
 
-    // initialize environments
+    // initialize environments and manage lifetime with Scope Guard
     const plssvm::environment::scope_guard environment_guard{};
+    // Note: necessary for Kokkos::SYCL
     [[maybe_unused]] const int ret = std::atexit(ensure_finalization);
 
     // prevent problems with fork() in the presence of multiple threads