diff --git a/.clang-format b/.clang-format
index a057d0bef..29924594b 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,4 +1,7 @@
 ---
+Language: Json
+DisableFormat: true
+---
 Language: Cpp
 AccessModifierOffset: -2
 AlignAfterOpenBracket: Align
@@ -79,7 +82,7 @@ IncludeBlocks: Regroup
 IncludeCategories:
   - Regex: '^"plssvm/'
     Priority: 1
-  - Regex: '^"(cuda|hip|CL|sycl|omp|hpx)'
+  - Regex: '^"(cuda|hip|CL|sycl|omp|hpx|Kokkos)'
     Priority: 2
   - Regex: '^"(tests|bindings)/'
     Priority: 3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 593b7b8f5..4993f4ae4 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,6 +72,7 @@ endif ()
 ########################################################################################################################
 ## set base sources
 set(PLSSVM_BASE_SOURCES
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/Kokkos/execution_space.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/implementation_types.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/kernel_invocation_types.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/stdpar/implementation_types.cpp
@@ -411,6 +412,13 @@ if (PLSSVM_ENABLE_SYCL_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_SYCL_BACKEND)
     add_subdirectory(src/plssvm/backends/SYCL)
 endif ()
 
+## check for Kokkos backend
+set(PLSSVM_ENABLE_KOKKOS_BACKEND AUTO CACHE STRING "Enable SYCL Backend")
+set_property(CACHE PLSSVM_ENABLE_KOKKOS_BACKEND PROPERTY STRINGS AUTO ON OFF)
+if (PLSSVM_ENABLE_KOKKOS_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_KOKKOS_BACKEND)
+    add_subdirectory(src/plssvm/backends/Kokkos)
+endif ()
+
 ## check if ANY backend is available/has been enabled
 get_target_property(PLSSVM_LINKED_BACKENDS ${PLSSVM_ALL_LIBRARY_NAME} INTERFACE_LINK_LIBRARIES)
 if (NOT PLSSVM_LINKED_BACKENDS)
@@ -642,7 +650,7 @@ if (PLSSVM_ENABLE_FORMATTING)
     list(APPEND CMAKE_MESSAGE_INDENT "Formatting:  ")
     
     ## install library to add a clang-format target
-    set(PLSSVM_format_VERSION 7021abbf066e2e577926731c3fa4141f456c5024)
+    set(PLSSVM_format_VERSION d22c36043bea6ef85f3eb68b823f50703bd1cc21)
     find_package(format QUIET)
     if (format_FOUND)
         message(STATUS "Found package format.")
@@ -734,6 +742,10 @@ if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME})
     endforeach ()
     list(APPEND PLSSVM_BACKEND_NAME_LIST "sycl")
 endif ()
+if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME})
+    message(STATUS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING}")
+    list(APPEND PLSSVM_BACKEND_NAME_LIST "kokkos")
+endif ()
 message(STATUS "")
 
 ########################################################################################################################
@@ -758,8 +770,8 @@ message(STATUS "Generating manpage files.")
 string(TIMESTAMP PLSSVM_CURRENT_BUILD_TIME "%d. %B %Y")
 string(REPLACE ";" "|" PLSSVM_PLATFORM_NAME_LIST "${PLSSVM_PLATFORM_NAME_LIST}")
 string(REPLACE ";" "|" PLSSVM_BACKEND_NAME_LIST "${PLSSVM_BACKEND_NAME_LIST}")
-string(REPLACE ";" "|" PLSSVM_SYCL_BACKEND_NAME_LIST "${PLSSVM_SYCL_BACKEND_NAME_LIST}")
 if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME})
+    string(REPLACE ";" "|" PLSSVM_SYCL_BACKEND_NAME_LIST "${PLSSVM_SYCL_BACKEND_NAME_LIST}")
     set(PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY "
 .TP
 .B --sycl_implementation_type
@@ -772,6 +784,15 @@ choose the kernel invocation type when using SYCL as backend: automatic|nd_range
 ")
 endif ()
 set(PLSSVM_SYCL_MANPAGE_ENTRY "${PLSSVM_SYCL_KERNEL_INVOCATION_TYPE_MANPAGE_ENTRY}${PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY}")
+## assemble the Kokkos manpage entry
+if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME})
+    string(REPLACE ";" "|" PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}")
+    set(PLSSVM_KOKKOS_MANPAGE_ENTRY "
+.TP
+.B --kokkos_execution_space
+choose the Kokkos execution space to be used in the Kokkos backend: automatic|${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES} (default: automatic)
+")
+endif ()
 ## assemble the performance tracker manpage entry
 if (PLSSVM_ENABLE_PERFORMANCE_TRACKING)
     set(PLSSVM_PERFORMANCE_TRACKER_MANPAGE_ENTRY "
@@ -787,6 +808,7 @@ configure_file(
         ${CMAKE_CURRENT_SOURCE_DIR}/docs/plssvm-train.1
         @ONLY
 )
+# update manpage entry since plssvm-predict can't recognize the SYCL kernel invocation type
 set(PLSSVM_SYCL_MANPAGE_ENTRY "${PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY}")
 configure_file(
         ${CMAKE_CURRENT_SOURCE_DIR}/docs/plssvm-predict.1.in
@@ -866,6 +888,7 @@ install(FILES
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmHPXTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmAdaptiveCppTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmDPCPPTargets.cmake"
+        "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmKokkosTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmstdparTargets.cmake"
         DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/plssvm/cmake
 )
diff --git a/CMakePresets.json b/CMakePresets.json
index c6bf7373f..e226c44fd 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -14,6 +14,7 @@
     "cmake/presets/opencl.json",
     "cmake/presets/acpp.json",
     "cmake/presets/dpcpp.json",
+    "cmake/presets/kokkos.json",
     "cmake/presets/all.json"
   ]
 }
diff --git a/README.md b/README.md
index 394dd8e04..0091ad85c 100644
--- a/README.md
+++ b/README.md
@@ -65,6 +65,7 @@ The main highlights of our SVM implementations are:
    - [HIP](https://github.com/ROCm-Developer-Tools/HIP)
    - [OpenCL](https://www.khronos.org/opencl/)
    - [SYCL](https://www.khronos.org/sycl/) (supported implementations are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL); specifically the versions [sycl-nightly/20231201](https://github.com/intel/llvm/tree/sycl-nightly/20230110) and AdaptiveCpp release [v24.06.0](https://github.com/AdaptiveCpp/AdaptiveCpp/releases/tag/v23.10.0))
+   - [Kokkos](https://github.com/kokkos/kokkos) (all execution spaces supported except `OpenMPTarget` and `OpenACC`); specifically the version [4.5.00](https://github.com/kokkos/kokkos/releases/tag/4.5.00)
 3. Six different kernel functions to be able to classify a large variety of different problems:
    - linear: $\vec{u}^T$ $\cdot$ $\vec{v}$
    - polynomial: $(\gamma$ $\cdot$ $\vec{u}^T$ $\cdot$ $\vec{v}$ $+$ $coef0)^{d}$
@@ -128,6 +129,10 @@ Additional dependencies for the SYCL backend:
 
 - the code must be compiled with a SYCL capable compiler; currently supported are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp)
 
+Additional dependencies for the Kokkos backend:
+
+- a Kokkos installation with the respective execution spaces enabled; currently all execution spaces are supported except `OpenMPTarget` and `OpenACC`
+
 Additional dependencies for the stdpar backend:
 
 - the code must be compiled with a stdpar capable compiler; currently supported are [nvc++](https://developer.nvidia.com/hpc-sdk), [roc-stdpar](https://github.com/ROCm/roc-stdpar), [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html), [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp), and [GNU GCC](https://gcc.gnu.org/))
@@ -243,6 +248,11 @@ The `[optional_options]` can be one or multiple of:
   - `AUTO`: check for the OpenMP backend but **do not** fail if not available
   - `OFF`: do not check for the OpenMP backend
 
+- `PLSSVM_ENABLE_HPX_BACKEND=ON|OFF|AUTO` (default: `AUTO`):
+    - `ON`: check for the HPX backend and fail if not available
+    - `AUTO`: check for the HPX backend but **do not** fail if not available
+    - `OFF`: do not check for the HPX backend
+
 - `PLSSVM_ENABLE_STDPAR_BACKEND=ON|OFF|AUTO` (default: `AUTO`):
     - `ON`: check for the stdpar backend and fail if not available
     - `AUTO`: check for the stdpar backend but **do not** fail if not available
@@ -268,6 +278,11 @@ The `[optional_options]` can be one or multiple of:
   - `AUTO`: check for the SYCL backend but **do not** fail if not available
   - `OFF`: do not check for the SYCL backend
 
+- `PLSSVM_ENABLE_KOKKOS_BACKEND=ON|OFF|AUTO` (default: `AUTO`):
+    - `ON`: check for the Kokkos backend and fail if not available
+    - `AUTO`: check for the Kokkos backend but **do not** fail if not available
+    - `OFF`: do not check for the Kokkos backend
+
 **Attention:** at least one backend must be enabled and available!
 
 - `PLSSVM_ENABLE_FAST_MATH=ON|OFF` (default depending on `CMAKE_BUILD_TYPE`: `ON` for Release or RelWithDebInfo, `OFF` otherwise): enable `fast-math` compiler flags for all backends
@@ -344,6 +359,10 @@ If more than one SYCL implementation is available the environment variables `PLS
 
 - `PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` (`dpcpp`|`adaptivecpp`): specify the preferred SYCL implementation if the `sycl_implementation_type` option is set to `automatic`; additional the specified SYCL implementation is used in the `plssvm::sycl` namespace, the other implementations are available in the `plssvm::dpcpp` and `plssvm::adaptivecpp` namespace respectively
 
+If the Kokkos backend is available the following additional option is available (**note**: this option takes only effect if the Kokkos SYCL execution space is available):
+
+- `PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT` (default: `ON`): enable Ahead-of-Time (AOT) compilation for the specified target platforms
+
 If the stdpar backend is available, an additional options can be set.
 
 - `PLSSVM_STDPAR_BACKEND_IMPLEMENTATION` (default: `AUTO`): explicitly specify the used stdpar implementation; must be one of: `AUTO`, `NVHPC`, `roc-stdpar`, `IntelLLVM`, `ACPP`, `GNU_TBB`.
@@ -363,24 +382,6 @@ Available configure presets:
   "hpx"                     - HPX backend
   "hpx_python"              - HPX backend + Python bindings
   "hpx_test"                - HPX backend tests
-  "cuda"                    - CUDA backend
-  "cuda_python"             - CUDA backend + Python bindings
-  "cuda_test"               - CUDA backend tests
-  "hip"                     - HIP backend
-  "hip_python"              - HIP backend + Python bindings
-  "hip_test"                - HIP backend tests
-  "opencl"                  - OpenCL backend
-  "opencl_python"           - OpenCL backend + Python bindings
-  "opencl_test"             - OpenCL backend tests
-  "acpp"                    - AdaptiveCpp SYCL backend
-  "acpp_python"             - AdaptiveCpp SYCL backend + Python bindings
-  "acpp_test"               - AdaptiveCpp SYCL backend tests
-  "dpcpp"                   - DPC++/icpx SYCL backend
-  "dpcpp_python"            - DPC++/icpx backend + Python bindings
-  "dpcpp_test"              - DPC++/icpx backend tests
-  "all"                     - All available backends
-  "all_python"              - All available backends + Python bindings
-  "all_test"                - All available backends tests
   "stdpar"                  - stdpar backend
   "stdpar_python"           - stdpar backend + Python bindings
   "stdpar_test"             - stdpar backend tests
@@ -399,6 +400,27 @@ Available configure presets:
   "stdpar_intelllvm"        - stdpar IntelLLVM (icpx) backend
   "stdpar_intelllvm_python" - stdpar IntelLLVM (icpx) backend + Python bindings
   "stdpar_intelllvm_test"   - stdpar IntelLLVM (icpx) backend tests
+  "cuda"                    - CUDA backend
+  "cuda_python"             - CUDA backend + Python bindings
+  "cuda_test"               - CUDA backend tests
+  "hip"                     - HIP backend
+  "hip_python"              - HIP backend + Python bindings
+  "hip_test"                - HIP backend tests
+  "opencl"                  - OpenCL backend
+  "opencl_python"           - OpenCL backend + Python bindings
+  "opencl_test"             - OpenCL backend tests
+  "acpp"                    - AdaptiveCpp SYCL backend
+  "acpp_python"             - AdaptiveCpp SYCL backend + Python bindings
+  "acpp_test"               - AdaptiveCpp SYCL backend tests
+  "dpcpp"                   - DPC++/icpx SYCL backend
+  "dpcpp_python"            - DPC++/icpx backend + Python bindings
+  "dpcpp_test"              - DPC++/icpx backend tests
+  "kokkos"                  - Kokkos backend
+  "kokkos_python"           - Kokkos backend + Python bindings
+  "kokkos_test"             - Kokkos backend tests
+  "all"                     - All available backends
+  "all_python"              - All available backends + Python bindings
+  "all_test"                - All available backends tests
 ```
 
 With these presets, building and testing, e.g., our CUDA backend is as simple as typing (in the PLSSVM root directory):
@@ -553,12 +575,14 @@ Usage:
   -i, --max_iter arg            set the maximum number of CG iterations (default: num_features)
   -l, --solver arg              choose the solver: automatic|cg_explicit|cg_implicit (default: automatic)
   -a, --classification arg      the classification strategy to use for multi-class classification: oaa|oao (default: oaa)
-  -b, --backend arg             choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|stdpar (default: automatic)
+  -b, --backend arg             choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|kokkos|stdpar (default: automatic)
   -p, --target_platform arg     choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic)
       --sycl_kernel_invocation_type arg
                                 choose the kernel invocation type when using SYCL as backend: automatic|nd_range (default: automatic)
       --sycl_implementation_type arg
                                 choose the SYCL implementation to be used in the SYCL backend: automatic|dpcpp|adaptivecpp (default: automatic)
+      --kokkos_execution_space arg
+                                choose the Kokkos execution space to be used in the Kokkos backend: automatic|Cuda|OpenMP|Serial (default: automatic)
       --performance_tracking arg
                                 the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr
       --use_strings_as_labels   use strings as labels instead of plane numbers
@@ -594,10 +618,10 @@ Another example targeting NVIDIA GPUs using the SYCL backend looks like:
 
 The `--backend=automatic` option works as follows:
 
-- if the `gpu_nvidia` target is available, check for existing backends in order `cuda` 🠦 `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
-- otherwise, if the `gpu_amd` target is available, check for existing backends in order `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
-- otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `stdpar`
-- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `hpx` 🠦 `stdpar`
+- if the `gpu_nvidia` target is available, check for existing backends in order `cuda` 🠦 `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `kokkos` 🠦 `stdpar`
+- otherwise, if the `gpu_amd` target is available, check for existing backends in order `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `kokkos` 🠦 `stdpar`
+- otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `kokkos` 🠦 `stdpar`
+- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `kokkos` 🠦 `opencl` 🠦 `openmp` 🠦 `hpx` 🠦 `stdpar`
 
 Note that during CMake configuration it is guaranteed that at least one of the above combinations does exist.
 
@@ -609,11 +633,13 @@ The `--target_platform=automatic` option works for the different backends as fol
 - `HIP`: always selects an AMD GPU (if no AMD GPU is available, throws an exception)
 - `OpenCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
 - `SYCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
+- `Kokkos`: checks which execution spaces are available and which target platforms they support and then tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
 - `stdpar`: target device must be selected at compile time (using `PLSSVM_TARGET_PLATFORMS`) or using environment variables at runtime
 
 The `--sycl_kernel_invocation_type` and `--sycl_implementation_type` flags are only used if the `--backend` is `sycl`, otherwise a warning is emitted on `stderr`.
 If the `--sycl_kernel_invocation_type` is `automatic`, the `nd_range` invocation type is currently always used.
 If the `--sycl_implementation_type` is `automatic`, the used SYCL implementation is determined by the `PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` CMake flag.
+If the `--kokkos_execution_space` is `automatic`, uses the best fitting execution space based on the provided and/or available target platforms.
 
 ### Predicting using `plssvm-predict`
 
@@ -628,10 +654,12 @@ LS-SVM with multiple (GPU-)backends
 Usage:
   ./plssvm-predict [OPTION...] test_file model_file [output_file]
 
-  -b, --backend arg             choose the backend: automatic|openmp|cuda|hip|opencl|sycl|stdpar (default: automatic)
+  -b, --backend arg             choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|kokkos|stdpar (default: automatic)
   -p, --target_platform arg     choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic)
       --sycl_implementation_type arg
                                 choose the SYCL implementation to be used in the SYCL backend: automatic|dpcpp|adaptivecpp (default: automatic)
+      --kokkos_execution_space arg
+                                choose the Kokkos execution space to be used in the Kokkos backend: automatic|Cuda|OpenMP|Serial (default: automatic)
       --performance_tracking arg
                                 the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr
       --use_strings_as_labels   use strings as labels instead of plane numbers
@@ -777,9 +805,6 @@ Roughly the same can be achieved using our Python bindings with the following Py
 import plssvm
 from sklearn.metrics import classification_report
 
-# correctly initialize and finalize environments
-environment_guard = plssvm.environment.ScopeGuard()
-
 try:
     # create a new C-SVM parameter set, explicitly overriding the default kernel function
     params = plssvm.Parameter(kernel_type=plssvm.KernelFunctionType.POLYNOMIAL)
diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt
index f951f77a4..f7d4e571d 100644
--- a/bindings/Python/CMakeLists.txt
+++ b/bindings/Python/CMakeLists.txt
@@ -41,7 +41,6 @@ set(PLSSVM_PYTHON_BINDINGS_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/classification_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp
     ${CMAKE_CURRENT_LIST_DIR}/data_set.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/environment.cpp
     ${CMAKE_CURRENT_LIST_DIR}/file_format_types.cpp
     ${CMAKE_CURRENT_LIST_DIR}/gamma.cpp
     ${CMAKE_CURRENT_LIST_DIR}/kernel_function_types.cpp
@@ -98,6 +97,9 @@ endif ()
 if (TARGET ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME})
     list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/dpcpp_csvm.cpp)
 endif ()
+if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME})
+    list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/kokkos_csvm.cpp)
+endif ()
 
 # create pybind11 module
 set(PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME plssvm)
diff --git a/bindings/Python/README.md b/bindings/Python/README.md
index afe9d6bb4..04d0cee14 100644
--- a/bindings/Python/README.md
+++ b/bindings/Python/README.md
@@ -10,10 +10,9 @@
         - [plssvm.Parameter](#plssvmparameter)
         - [plssvm.DataSet](#plssvmdataset)
         - [plssvm.CSVM](#plssvmcsvm)
-        - [plssvm.openmp.CSVM, plssvm.hpx.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM](#plssvmopenmpcsvm-plssvmhpxcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm)
+        - [plssvm.openmp.CSVM, plssvm.hpx.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM, plssvm.kokkos.CSVM](#plssvmopenmpcsvm-plssvmhpxcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm-plssvmkokkoscsvm)
         - [plssvm.Model](#plssvmmodel)
         - [plssvm.Version](#plssvmversion)
-        - [plssvm.environment.ScopeGuard](#plssvmenvironmentscopeguard)
         - [plssvm.detail.tracking.PerformanceTracker](#plssvmdetailtrackingperformancetracker)
         - [plssvm.detail.tracking.Events](#plssvmdetailtrackingevent-plssvmdetailtrackingevents)
     - [Free functions](#free-functions)
@@ -188,17 +187,16 @@ More detailed description of the class methods:
 
 The following table lists all PLSSVM enumerations exposed on the Python side:
 
-| enumeration            | values                                                               | description                                                                                                                                                                                                                                                 |
-|------------------------|----------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `TargetPlatform`       | `AUTOMATIC`, `CPU`, `GPU_NVIDIA`, `GPU_AMD`, `GPU_INTEL`             | The different supported target platforms (default: `AUTOMATIC`). If `AUTOMATIC` is provided, checks for available devices in the following order: NVIDIA GPUs -> AMD GPUs -> Intel GPUs -> CPUs.                                                            |
-| `SolverType`           | `AUTOMATIC`, `CG_EXPLICIT`, `CG_IMPLICIT`                            | The different supported solver types (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the used solver types depends on the available device and system memory.                                                                                           |
-| `KernelFunctionType`   | `LINEAR`, `POLYNOMIAL`, `RBF`, `SIGMOID`, `LAPLACIAN`, `CHI_SQUARED` | The different supported kernel functions (default: `LINEAR`).                                                                                                                                                                                               |
-| `FileFormatType`       | `LIBSVM`, `ARFF`                                                     | The different supported file format types (default: `LIBSVM`).                                                                                                                                                                                              |
-| `GammaCoefficientType` | `AUTOMATIC`, `SCALE`                                                 | The different modes for the dynamic gamma calculation (default: `AUTOMATIC`).                                                                                                                                                                               |
-| `ClassificationType`   | `OAA`, `OAO`                                                         | The different supported multi-class classification strategies (default: `LIBSVM`).                                                                                                                                                                          |
-| `BackendType`          | `AUTOMATIC`, `OPENMP`, `HPX`, `CUDA`, `HIP`, `OPENCL`, `SYCL`        | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform.                                                                                                              |
-| `VerbosityLevel`       | `QUIET`, `LIBSVM`, `TIMING`, `FULL`                                  | The different supported log levels (default: `FULL`). `QUIET` means no output, `LIBSVM` output that is as conformant as possible with LIBSVM's output, `TIMING` all timing related outputs, and `FULL` everything. Can be combined via bit-wise operations. |
-| `Status`               | `UNINITIALIZED`, `INITIALIZED`, `FINALIZED`, `UNNECESSARY`           | The different environment status values. **Note**: located in the `plssvm.environment` module.                                                                                                                                                              |                                                                                                                                                                                                                   |
+| enumeration            | values                                                                  | description                                                                                                                                                                                                                                                 |
+|------------------------|-------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `TargetPlatform`       | `AUTOMATIC`, `CPU`, `GPU_NVIDIA`, `GPU_AMD`, `GPU_INTEL`                | The different supported target platforms (default: `AUTOMATIC`). If `AUTOMATIC` is provided, checks for available devices in the following order: NVIDIA GPUs -> AMD GPUs -> Intel GPUs -> CPUs.                                                            |
+| `SolverType`           | `AUTOMATIC`, `CG_EXPLICIT`, `CG_IMPLICIT`                               | The different supported solver types (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the used solver types depends on the available device and system memory.                                                                                           |
+| `KernelFunctionType`   | `LINEAR`, `POLYNOMIAL`, `RBF`, `SIGMOID`, `LAPLACIAN`, `CHI_SQUARED`    | The different supported kernel functions (default: `LINEAR`).                                                                                                                                                                                               |
+| `FileFormatType`       | `LIBSVM`, `ARFF`                                                        | The different supported file format types (default: `LIBSVM`).                                                                                                                                                                                              |
+| `GammaCoefficientType` | `AUTOMATIC`, `SCALE`                                                    | The different modes for the dynamic gamma calculation (default: `AUTOMATIC`).                                                                                                                                                                               |
+| `ClassificationType`   | `OAA`, `OAO`                                                            | The different supported multi-class classification strategies (default: `LIBSVM`).                                                                                                                                                                          |
+| `BackendType`          | `AUTOMATIC`, `OPENMP`, `HPX`, `CUDA`, `HIP`, `OPENCL`, `SYCL`, `KOKKOS` | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform.                                                                                                              |
+| `VerbosityLevel`       | `QUIET`, `LIBSVM`, `TIMING`, `FULL`                                     | The different supported log levels (default: `FULL`). `QUIET` means no output, `LIBSVM` output that is as conformant as possible with LIBSVM's output, `TIMING` all timing related outputs, and `FULL` everything. Can be combined via bit-wise operations. |
 
 If a SYCL implementation is available, additional enumerations are available:
 
@@ -213,6 +211,12 @@ If the stdpar backend is available, an additional enumeration is available:
 |----------------------|---------------------------------------------------------------|-------------------------------------------------|
 | `ImplementationType` | `NVHPC`, `ROC_STDPAR`, `INTEL_LLVM`, `ADAPTIVECPP`, `GNU_TBB` | The different supported stdpar implementations. |
 
+If the Kokos backend is available, an additional enumeration is available:
+
+| enumeration      | values                                                                                 | description                                      |
+|------------------|----------------------------------------------------------------------------------------|--------------------------------------------------|
+| `ExecutionSpace` | `CUDA`, `HIP`, `SYCL`, `HPX`, `OPENMP`, `OPENMPTARGET`, `OPENACC`, `THREADS`, `SERIAL` | The different supported Kokkos execution spaces. |
+
 ### Classes and submodules
 
 The following tables list all PLSSVM classes exposed on the Python side:
@@ -337,9 +341,8 @@ If the most performant backend should be used, it is sufficient to use `plssvm.C
 `sycl_implementation_type` to choose between DPC++ and AdaptiveCpp as SYCL implementations
 and `sycl_kernel_invocation_type` to choose between the two different SYCL kernel invocation types.
 
-**Note**: if the backend type is `plssvm.BackendType.HPX` it is necessary to initialize and finalize the HPX runtime.
-The runtime can be manually managed using `plssvm.environment.initialize()` and `plssvm.environment.finalize()`.
-We recommend utilizing `plssvm.environment.ScopeGuard()` to manage the lifetime of the HPX runtime automatically.
+**Note**: if the backend type is `plssvm.BackendType.HPX` or `plssvm.BackendType.Kokkos` special initialization and finalization functions must be called. 
+However, this is **automatically** handled by our Python bindings on the module import and cleanup.
 
 | methods                                                                                                                                      | description                                                                                                                                                                                                         |
 |----------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -353,7 +356,7 @@ We recommend utilizing `plssvm.environment.ScopeGuard()` to manage the lifetime
 | `score(model)`                                                                                                                               | Score the model with respect to itself returning its accuracy.                                                                                                                                                      |
 | `score(model, data_set)`                                                                                                                     | Score the model given the provided data set returning its accuracy.                                                                                                                                                 |
 
-#### `plssvm.openmp.CSVM`, `plssvm.hpx.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM`
+#### `plssvm.openmp.CSVM`, `plssvm.hpx.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM`, `plssvm.kokkos.CSVM`
 
 These classes represent the backend specific CSVMs.
 **Note**: they are only available if the respective backend has been enabled during PLSSVM's build step.
@@ -391,6 +394,14 @@ CSVM.
 |-----------------------------|---------------------------------------------|
 | `get_implementation_type()` | Return the used stdpar implementation type. |
 
+In case of the Kokkos CSVM (`plssvm.kokkos.CSVM`) the following method is additional available for the backend specific 
+CSVM.
+
+
+| methods                 | description                             |
+|-------------------------|-----------------------------------------|
+| `get_execution_space()` | Return the used Kokkos execution space. |
+
 #### `plssvm.Model`
 
 A class encapsulating a model learned during a call to `plssvm.CSVM.fit()`.
@@ -429,19 +440,6 @@ A class encapsulating the version information of the used PLSSVM installation.
 | `minor : int`      | The minor PLSSVM version.                 |
 | `patch : int`      | The patch PLSSVM version.                 |
 
-#### `plssvm.environment.ScopeGuard`
-
-The environmental scope guard can be used to automatically finalize all necessary backend environments when it goes out of scope.
-
-| constructors                            | description                                                                                                                                                                             |
-|-----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `ScopeGuard([backends={}])`             | Create a new scope guard initializing all available backend environments. If a list of backends is provided, only initializes these backends.                                           |
-| `ScopeGuard(argc, argv, [backends={}])` | Create a new scope guard initializing all available backend environments using the provided command line arguments. If a list of backends is provided, only initializes these backends. |
-
-| methods      | description                                                                                                                       |
-|--------------|-----------------------------------------------------------------------------------------------------------------------------------|
-| `backends()` | Return all initialized backends. All backends returned by this function will be finalized when the scope guard goes out of scope. |
-
 #### `plssvm.detail.tracking.PerformanceTracker`
 
 A submodule used to track various performance statistics like runtimes, but also the used setup and hyperparameters.
@@ -535,15 +533,6 @@ If a stdpar implementation is available, additional free functions are available
 |-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|
 | `list_available_stdpar_implementations()` | List all available stdpar implementations (determined during PLSSVM's build step; currently always guaranteed to be only one implementation). |
 
-Additional free functions are available under `plssvm.environment.`.
-
-| function                                | description                                                                                                                                                  |
-|-----------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `get_backend_status(backend)`           | Return the current environment status of the provided backend.                                                                                               |
-| `initialize([backends={}])`             | Initialize all available backend environments. If a list of backends is provided, only initializes these backends.                                           |
-| `initialize(argc, argv, [backends={}])` | Initialize all available backend environments using the provided command line arguments. If a list of backends is provided, only initializes these backends. |
-| `finalize([backends={}])`               | Finalize all available backend environments. If a list of backends is provided, only finalizes these backends.                                               |
-
 ### Exceptions
 
 The PLSSVM Python3 bindings define a few new exception types:
@@ -562,6 +551,5 @@ The PLSSVM Python3 bindings define a few new exception types:
 | `MatrixError`                | If something went wrong in the internal matrix class. **Note**: shouldn't occur in user code.                          |
 | `KernelLaunchResourcesError` | If something went wrong during a kernel launch due to insufficient ressources.                                         |
 | `ClassificationReportError`  | If something in the classification report went wrong. **Note**: shouldn't occur in user code.                          |
-| `EnvironmentError`           | If something during environment initialization or finalization went wrong.                                             |
 
 Depending on the available backends, additional `BackendError`s are also available (e.g., `plssvm.cuda.BackendError`).
diff --git a/bindings/Python/backend_types.cpp b/bindings/Python/backend_types.cpp
index f88f8c2e2..5664cf360 100644
--- a/bindings/Python/backend_types.cpp
+++ b/bindings/Python/backend_types.cpp
@@ -26,7 +26,8 @@ void init_backend_types(py::module_ &m) {
         .value("CUDA", plssvm::backend_type::cuda, "CUDA to target NVIDIA GPUs only")
         .value("HIP", plssvm::backend_type::hip, "HIP to target AMD and NVIDIA GPUs")
         .value("OPENCL", plssvm::backend_type::opencl, "OpenCL to target CPUs and GPUs from different vendors")
-        .value("SYCL", plssvm::backend_type::sycl, "SYCL to target CPUs and GPUs from different vendors; currently tested SYCL implementations are DPC++ and AdaptiveCpp");
+        .value("SYCL", plssvm::backend_type::sycl, "SYCL to target CPUs and GPUs from different vendors; currently tested SYCL implementations are DPC++ and AdaptiveCpp")
+        .value("KOKKOS", plssvm::backend_type::kokkos, "Kokkos to target CPUs and GPUs from different vendors; currently all Kokkos execution spaces except Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC are supported");
 
     // bind free functions
     m.def("list_available_backends", &plssvm::list_available_backends, "list the available backends (as found during CMake configuration)");
diff --git a/bindings/Python/backends/adaptivecpp_csvm.cpp b/bindings/Python/backends/adaptivecpp_csvm.cpp
index 767853757..bf81b11ae 100644
--- a/bindings/Python/backends/adaptivecpp_csvm.cpp
+++ b/bindings/Python/backends/adaptivecpp_csvm.cpp
@@ -11,7 +11,7 @@
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/csvm.hpp"                                   // plssvm::csvm
 #include "plssvm/exceptions/exceptions.hpp"                  // plssvm::exception
-#include "plssvm/parameter.hpp"                              // plssvm::parameter
+#include "plssvm/parameter.hpp"                              // plssvm::parameter, plssvm::sycl_kernel_invocation_type
 #include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
 
 #include "bindings/Python/utility.hpp"  // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception
diff --git a/bindings/Python/backends/dpcpp_csvm.cpp b/bindings/Python/backends/dpcpp_csvm.cpp
index 882d6ea37..906cb5979 100644
--- a/bindings/Python/backends/dpcpp_csvm.cpp
+++ b/bindings/Python/backends/dpcpp_csvm.cpp
@@ -11,7 +11,7 @@
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/csvm.hpp"                                   // plssvm::csvm
 #include "plssvm/exceptions/exceptions.hpp"                  // plssvm::exception
-#include "plssvm/parameter.hpp"                              // plssvm::parameter
+#include "plssvm/parameter.hpp"                              // plssvm::parameter, plssvm::sycl_kernel_invocation_type
 #include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
 
 #include "bindings/Python/utility.hpp"  // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception
diff --git a/bindings/Python/backends/kokkos_csvm.cpp b/bindings/Python/backends/kokkos_csvm.cpp
new file mode 100644
index 000000000..ea6c4af80
--- /dev/null
+++ b/bindings/Python/backends/kokkos_csvm.cpp
@@ -0,0 +1,77 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/csvm.hpp"             // plssvm::kokkos::csvm
+#include "plssvm/backends/Kokkos/exceptions.hpp"       // plssvm::kokkos::backend_exception
+#include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::execution_space
+#include "plssvm/csvm.hpp"                             // plssvm::csvm
+#include "plssvm/exceptions/exceptions.hpp"            // plssvm::exception
+#include "plssvm/parameter.hpp"                        // plssvm::parameter, plssvm::kokkos_execution_space
+#include "plssvm/target_platforms.hpp"                 // plssvm::target_platform
+
+#include "bindings/Python/utility.hpp"  // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception
+
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init
+#include "pybind11/stl.h"       // support for STL types
+
+#include <memory>  // std::make_unique
+
+namespace py = pybind11;
+
+void init_kokkos_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
+    // use its own submodule for the Kokkos CSVM bindings
+    py::module_ kokkos_module = m.def_submodule("kokkos", "a module containing all Kokkos backend specific functionality");
+
+    // bind the CSVM using the Kokkos backend
+    py::class_<plssvm::kokkos::csvm, plssvm::csvm>(kokkos_module, "CSVM")
+        .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object")
+        .def(py::init<plssvm::parameter>(), "create an SVM with the automatic target platform and provided parameter object")
+        .def(py::init<plssvm::target_platform>(), "create an SVM with the provided target platform and default parameter object")
+        .def(py::init<plssvm::target_platform, plssvm::parameter>(), "create an SVM with the provided target platform and parameter object")
+        .def(py::init([](const py::kwargs &args) {
+                 // check for valid keys
+                 check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" });
+                 // if one of the value keyword parameter is provided, set the respective value
+                 const plssvm::parameter params = convert_kwargs_to_parameter(args);
+                 // set Kokkos execution space
+                 const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>() : plssvm::kokkos::execution_space::automatic;
+                 // create CSVM with the default target platform
+                 return std::make_unique<plssvm::kokkos::csvm>(params, plssvm::kokkos_execution_space = space);
+             }),
+             "create an SVM with the default target platform and keyword arguments")
+        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
+                 // check for valid keys
+                 check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" });
+                 // if one of the value keyword parameter is provided, set the respective value
+                 const plssvm::parameter params = convert_kwargs_to_parameter(args);
+                 // set Kokkos execution space
+                 const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast<plssvm::kokkos::execution_space>() : plssvm::kokkos::execution_space::automatic;
+                 // create CSVM with the provided target platform
+                 return std::make_unique<plssvm::kokkos::csvm>(target, params, plssvm::kokkos_execution_space = space);
+             }),
+             "create an SVM with the provided target platform and keyword arguments")
+        .def("get_execution_space", &plssvm::kokkos::csvm::get_execution_space, "get the Kokkos execution space used in this Kokkos SVM");
+
+    // register Kokkos backend specific exceptions
+    register_py_exception<plssvm::kokkos::backend_exception>(kokkos_module, "BackendError", base_exception);
+
+    // bind the execution space enum classes
+    py::enum_<plssvm::kokkos::execution_space>(kokkos_module, "ExecutionSpace")
+        .value("AUTOMATIC", plssvm::kokkos::execution_space::cuda, "automatically determine the used Kokkos execution space (note: this does not necessarily correspond to Kokkos::DefaultExecutionSpace)")
+        .value("CUDA", plssvm::kokkos::execution_space::cuda, "execution space representing execution on a CUDA device")
+        .value("HIP", plssvm::kokkos::execution_space::hip, "execution space representing execution on a device supported by HIP")
+        .value("SYCL", plssvm::kokkos::execution_space::sycl, "execution space representing execution on a device supported by SYCL")
+        .value("HPX", plssvm::kokkos::execution_space::hpx, "execution space representing execution with the HPX runtime system")
+        .value("OPENMP", plssvm::kokkos::execution_space::openmp, "execution space representing execution with the OpenMP runtime system")
+        .value("OPENMPTARGET", plssvm::kokkos::execution_space::openmp_target, "execution space representing execution using the target offloading feature of the OpenMP runtime system")
+        .value("OPENACC", plssvm::kokkos::execution_space::openacc, "execution space representing execution with the OpenACC runtime system")
+        .value("THREADS", plssvm::kokkos::execution_space::threads, "execution space representing parallel execution with std::threads")
+        .value("SERIAL", plssvm::kokkos::execution_space::serial, "execution space representing serial execution on the CPU; should always be available");
+
+    kokkos_module.def("list_available_execution_spaces", &plssvm::kokkos::list_available_execution_spaces, "list all available Kokkos execution spaces");
+}
diff --git a/bindings/Python/environment.cpp b/bindings/Python/environment.cpp
deleted file mode 100644
index c9a467187..000000000
--- a/bindings/Python/environment.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * @author Alexander Van Craen
- * @author Marcel Breyer
- * @copyright 2018-today The PLSSVM project - All Rights Reserved
- * @license This file is part of the PLSSVM project which is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "plssvm/environment.hpp"
-
-#include "plssvm/backend_types.hpp"  // plssvm::backend_type, plssvm::list_available_backends
-
-#include "bindings/Python/utility.hpp"  // check_kwargs_for_correctness
-
-#include "pybind11/pybind11.h"  // py::module_, py::enum_
-#include "pybind11/pytypes.h"   // py::kwargs
-#include "pybind11/stl.h"       // support for STL types: std::variant
-
-#include <cstddef>  // std::size_t
-#include <memory>   // std::make_unique
-#include <vector>   // std::vector
-
-namespace py = pybind11;
-
-void init_environment(py::module_ &m) {
-    // use its own submodule for the environment related bindings
-    py::module_ env_module = m.def_submodule("environment", "a module containing all environment initialization and finalization functionality");
-
-    // bind enum class
-    py::enum_<plssvm::environment::status>(m, "Status")
-        .value("UNINITIALIZED", plssvm::environment::status::uninitialized, "the backend environment hasn't been initialized or finalized yet")
-        .value("INITIALIZED", plssvm::environment::status::initialized, "the backend environment has been initialized but not finalized yet")
-        .value("FINALIZED", plssvm::environment::status::finalized, "the backend environment has already been initialized and finalized")
-        .value("UNNECESSARY", plssvm::environment::status::unnecessary, "no backend environment initialization or finalization necessary");
-
-    // bind free functions
-    env_module.def("get_backend_status", &plssvm::environment::get_backend_status, "get the environment status for the provided backend");
-    env_module.def("is_initialization_necessary", &plssvm::environment::is_initialization_necessary, "check if the provided backend needs a special environment initialization");
-
-    env_module.def("initialize", [](const py::kwargs &args) {
-        // check for valid keys
-        check_kwargs_for_correctness(args, { "backends" });
-        if (args.contains("backends")) {
-            plssvm::environment::initialize(args["backends"].cast<std::vector<plssvm::backend_type>>());
-        } else {
-            plssvm::environment::initialize();
-        } }, "initialize all available backends or only the optionally provided once");
-    env_module.def("initialize", [](std::vector<std::string> cmd_args, const py::kwargs &args) {
-        std::vector<char *> cmd_args_ptr(cmd_args.size());
-        for (std::size_t i = 0; i < cmd_args.size(); ++i) {
-            cmd_args_ptr[i] = cmd_args[i].data();
-        }
-        // assemble command line arguments
-        int argc = static_cast<int>(cmd_args_ptr.size());
-        char **argv = cmd_args_ptr.data();
-
-        // check for valid keys
-        check_kwargs_for_correctness(args, { "backends" });
-        if (args.contains("backends")) {
-            plssvm::environment::initialize(argc, argv, args["backends"].cast<std::vector<plssvm::backend_type>>());
-        } else {
-            plssvm::environment::initialize(argc, argv);
-        } }, "initialize all available backends or only the optionally provided once using the provided command line parameters");
-
-    env_module.def("finalize", [](const py::kwargs &args) {
-        // check for valid keys
-        check_kwargs_for_correctness(args, { "backends" });
-        if (args.contains("backends")) {
-            plssvm::environment::finalize(args["backends"].cast<std::vector<plssvm::backend_type>>());
-        } else {
-            plssvm::environment::finalize();
-        } }, "finalize all available backends or only the optionally provided once");
-
-    // bind plssvm::environment::scope_guard
-    py::class_<plssvm::environment::scope_guard>(env_module, "ScopeGuard")
-        .def(py::init([](const py::kwargs &args) {
-                 // check for valid keys
-                 check_kwargs_for_correctness(args, { "backends" });
-                 if (args.contains("backends")) {
-                     return std::make_unique<plssvm::environment::scope_guard>(args["backends"].cast<std::vector<plssvm::backend_type>>());
-                 } else {
-                     return std::make_unique<plssvm::environment::scope_guard>();
-                 }
-             }),
-             "create a new scope_guard and initialize all available backends or only the optionally provided once")
-        .def(py::init([](std::vector<std::string> cmd_args, const py::kwargs &args) {
-                 std::vector<char *> cmd_args_ptr(cmd_args.size());
-                 for (std::size_t i = 0; i < cmd_args.size(); ++i) {
-                     cmd_args_ptr[i] = cmd_args[i].data();
-                 }
-                 // assemble command line arguments
-                 int argc = static_cast<int>(cmd_args_ptr.size());
-                 char **argv = cmd_args_ptr.data();
-
-                 // check for valid keys
-                 check_kwargs_for_correctness(args, { "backends" });
-                 if (args.contains("backends")) {
-                     return std::make_unique<plssvm::environment::scope_guard>(argc, argv, args["backends"].cast<std::vector<plssvm::backend_type>>());
-                 } else {
-                     return std::make_unique<plssvm::environment::scope_guard>(argc, argv);
-                 }
-             }),
-             "create a new scope_guard and initialize all available backends or only the optionally provided once using the provided command line parameters")
-        .def("backends", &plssvm::environment::scope_guard::backends, "return all initialized backends");
-}
diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp
index 170afa2c3..1c1248fb2 100644
--- a/bindings/Python/main.cpp
+++ b/bindings/Python/main.cpp
@@ -7,6 +7,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
+#include "plssvm/environment.hpp"            // plssvm::environment::{initialize, finalize}
 #include "plssvm/exceptions/exceptions.hpp"  // plssvm::exception
 
 #include "pybind11/pybind11.h"  // PYBIND11_MODULE, py::module_, py::exception, py::register_exception_translator
@@ -32,7 +33,6 @@ void init_parameter(py::module_ &);
 void init_model(py::module_ &);
 void init_data_set(py::module_ &);
 void init_version(py::module_ &);
-void init_environment(py::module_ &);
 void init_exceptions(py::module_ &, const py::exception<plssvm::exception> &);
 void init_csvm(py::module_ &);
 void init_openmp_csvm(py::module_ &, const py::exception<plssvm::exception> &);
@@ -42,11 +42,20 @@ void init_cuda_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_hip_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_opencl_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_sycl(py::module_ &, const py::exception<plssvm::exception> &);
+void init_kokkos_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_sklearn(py::module_ &);
 
 PYBIND11_MODULE(plssvm, m) {
     m.doc() = "Parallel Least Squares Support Vector Machine";
 
+    // automatically initialize the environments
+    plssvm::environment::initialize();
+
+    // automatically finalize the environments
+    m.add_object("_cleanup", py::capsule([]() {
+                     plssvm::environment::finalize();
+                 }));
+
     // register PLSSVM base exception
     static py::exception<plssvm::exception> base_exception(m, "PLSSVMError");
     py::register_exception_translator([](std::exception_ptr p) {
@@ -80,7 +89,6 @@ PYBIND11_MODULE(plssvm, m) {
     init_model(m);
     init_data_set(m);
     init_version(m);
-    init_environment(m);
     init_exceptions(m, base_exception);
     init_csvm(m);
 
@@ -106,6 +114,9 @@ PYBIND11_MODULE(plssvm, m) {
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
     init_sycl(m, base_exception);
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    init_kokkos_csvm(m, base_exception);
+#endif
 
     init_sklearn(m);
 }
diff --git a/cmake/plssvm/plssvmConfig.cmake.in b/cmake/plssvm/plssvmConfig.cmake.in
index 9636e125e..beb6801bc 100644
--- a/cmake/plssvm/plssvmConfig.cmake.in
+++ b/cmake/plssvm/plssvmConfig.cmake.in
@@ -25,7 +25,7 @@ find_dependency(fmt REQUIRED)
 include("${CMAKE_CURRENT_LIST_DIR}/plssvmTargets.cmake")
 
 # list all available libraries
-set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;HPX;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar")
+set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;HPX;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;Kokkos;stdpar")
 set(PLSSVM_DISABLED_COMPONENTS "${PLSSVM_SUPPORTED_COMPONENTS}")
 
 # check which libraries are available
diff --git a/cmake/plssvm/plssvmKokkosTargets.cmake b/cmake/plssvm/plssvmKokkosTargets.cmake
new file mode 100644
index 000000000..7ec32069a
--- /dev/null
+++ b/cmake/plssvm/plssvmKokkosTargets.cmake
@@ -0,0 +1,21 @@
+## Authors: Alexander Van Craen, Marcel Breyer
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+include(CMakeFindDependencyMacro)
+
+# check if the Kokkos backend is available
+if (TARGET plssvm::plssvm-Kokkos)
+    # enable Kokkos
+    find_dependency(Kokkos CONFIG)
+    # set alias targets
+    add_library(plssvm::Kokkos ALIAS plssvm::plssvm-Kokkos)
+    add_library(plssvm::kokkos ALIAS plssvm::plssvm-Kokkos)
+    # set COMPONENT to be found
+    set(plssvm_Kokkos_FOUND ON)
+else ()
+    # set COMPONENT to be NOT found
+    set(plssvm_Kokkos_FOUND OFF)
+endif ()
\ No newline at end of file
diff --git a/cmake/presets/all.json b/cmake/presets/all.json
index 52d77a14c..978922667 100644
--- a/cmake/presets/all.json
+++ b/cmake/presets/all.json
@@ -13,7 +13,8 @@
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
         "PLSSVM_ENABLE_OPENCL_BACKEND": "AUTO",
-        "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO"
+        "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_KOKKOS_BACKEND": "AUTO"
       }
     },
     {
@@ -28,6 +29,7 @@
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
         "PLSSVM_ENABLE_OPENCL_BACKEND": "AUTO",
         "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_KOKKOS_BACKEND": "AUTO",
         "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
         "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
       }
@@ -43,7 +45,8 @@
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
         "PLSSVM_ENABLE_OPENCL_BACKEND": "AUTO",
-        "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO"
+        "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_KOKKOS_BACKEND": "AUTO"
       }
     }
   ],
@@ -84,7 +87,7 @@
       "inherits": "common",
       "filter": {
         "include": {
-          "name": "OpenMP.*|HPX.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*"
+          "name": "OpenMP.*|HPX.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*|Kokkos.*"
         }
       }
     }
diff --git a/cmake/presets/common.json b/cmake/presets/common.json
index 68da8cd61..82bbea9e9 100644
--- a/cmake/presets/common.json
+++ b/cmake/presets/common.json
@@ -17,7 +17,8 @@
         "PLSSVM_ENABLE_CUDA_BACKEND": "OFF",
         "PLSSVM_ENABLE_HIP_BACKEND": "OFF",
         "PLSSVM_ENABLE_OPENCL_BACKEND": "OFF",
-        "PLSSVM_ENABLE_SYCL_BACKEND": "OFF"
+        "PLSSVM_ENABLE_SYCL_BACKEND": "OFF",
+        "PLSSVM_ENABLE_KOKKOS_BACKEND": "OFF"
       }
     },
     {
diff --git a/cmake/presets/kokkos.json b/cmake/presets/kokkos.json
new file mode 100644
index 000000000..620e940e5
--- /dev/null
+++ b/cmake/presets/kokkos.json
@@ -0,0 +1,142 @@
+{
+  "version": 6,
+  "include": [
+    "common.json"
+  ],
+  "configurePresets": [
+    {
+      "name": "kokkos",
+      "displayName": "Kokkos backend",
+      "inherits": "build",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_KOKKOS_BACKEND": "ON"
+      }
+    },
+    {
+      "name": "kokkos_python",
+      "displayName": "Kokkos backend + Python bindings",
+      "inherits": "build",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_KOKKOS_BACKEND": "ON",
+        "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
+        "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
+      }
+    },
+    {
+      "name": "kokkos_test",
+      "displayName": "Kokkos backend tests",
+      "inherits": "test",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_KOKKOS_BACKEND": "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "kokkos",
+      "displayName": "Kokkos backend",
+      "configurePreset": "kokkos",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "kokkos_python",
+      "displayName": "Kokkos backend + Python bindings",
+      "configurePreset": "kokkos_python",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "kokkos_test",
+      "displayName": "Kokkos backend tests",
+      "configurePreset": "kokkos_test",
+      "configuration": "Debug",
+      "inherits": "common"
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "kokkos_test",
+      "displayName": "Kokkos backend all tests",
+      "configurePreset": "kokkos_test",
+      "inherits": "common"
+    },
+    {
+      "name": "kokkos_backend_test",
+      "displayName": "Kokkos backend specific tests",
+      "configurePreset": "kokkos_test",
+      "inherits": "common",
+      "filter": {
+        "include": {
+          "name": "Kokkos.*"
+        }
+      }
+    }
+  ],
+  "workflowPresets": [
+    {
+      "name": "kokkos",
+      "displayName": "Kokkos backend workflow",
+      "steps": [
+        {
+          "name": "kokkos",
+          "type": "configure"
+        },
+        {
+          "name": "kokkos",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "kokkos_python",
+      "displayName": "Kokkos backend + Python bindings workflow",
+      "steps": [
+        {
+          "name": "kokkos_python",
+          "type": "configure"
+        },
+        {
+          "name": "kokkos_python",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "kokkos_test",
+      "displayName": "Kokkos test workflow",
+      "steps": [
+        {
+          "name": "kokkos_test",
+          "type": "configure"
+        },
+        {
+          "name": "kokkos_test",
+          "type": "build"
+        },
+        {
+          "name": "kokkos_test",
+          "type": "test"
+        }
+      ]
+    },
+    {
+      "name": "kokkos_backend_test",
+      "displayName": "Kokkos backend test workflow",
+      "steps": [
+        {
+          "name": "kokkos_test",
+          "type": "configure"
+        },
+        {
+          "name": "kokkos_test",
+          "type": "build"
+        },
+        {
+          "name": "kokkos_backend_test",
+          "type": "test"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 3bf366b62..ec8c0c40f 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -32,6 +32,7 @@ set(DOXYGEN_PROJECT_LOGO "${PROJECT_SOURCE_DIR}/docs/resources/logo_90x55.png")
 set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_")
 
 set(DOXYGEN_DOT_IMAGE_FORMAT "svg")
+set(DOXYGEN_DOT_GRAPH_MAX_NODES "100")
 set(DOXYGEN_INTERACTIVE_SVG "YES")
 set(DOXYGEN_INCLUDE_GRAPH "NO")
 set(DOXYGEN_EXTRACT_PRIVATE "YES")
diff --git a/docs/plssvm-predict.1.in b/docs/plssvm-predict.1.in
index bb9e29b6b..17d6081fa 100644
--- a/docs/plssvm-predict.1.in
+++ b/docs/plssvm-predict.1.in
@@ -22,6 +22,8 @@ choose the target platform: @PLSSVM_PLATFORM_NAME_LIST@ (default: automatic)
 
 @PLSSVM_SYCL_MANPAGE_ENTRY@
 
+@PLSSVM_KOKKOS_MANPAGE_ENTRY@
+
 @PLSSVM_PERFORMANCE_TRACKER_MANPAGE_ENTRY@
 
 .TP
diff --git a/docs/plssvm-train.1.in b/docs/plssvm-train.1.in
index b52853dac..fad2e4fba 100644
--- a/docs/plssvm-train.1.in
+++ b/docs/plssvm-train.1.in
@@ -17,7 +17,10 @@ plssvm-train is a utility to train an LS-SVM using different backends to target
 set type of kernel function.
     0 -- linear: u'*v
     1 -- polynomial: (gamma*u'*v + coef0)^degree
-    2 -- radial basis function: exp(-gamma*|u-v|^2) (default: 2)
+    2 -- radial basis function: exp(-gamma*|u-v|^2)
+    3 -- sigmoid: tanh(gamma*u'*v+coef0)
+    4 -- laplacian: exp(-gamma*|u-v|_1)
+    5 -- chi_squared: exp(-gamma*sum_i((x[i]-y[i])^2/(x[i]+y[i]))) (default: 2)
 
 .TP
 .B -d, --degree arg
@@ -25,7 +28,7 @@ set degree in kernel function (default: 3)
 
 .TP
 .B -g, --gamma arg
-set gamma in kernel function (default: 1 / num_features)
+set gamma in kernel function (default: automatic)
 
 .TP
 .B -r, --coef0 arg
@@ -61,6 +64,8 @@ choose the target platform: @PLSSVM_PLATFORM_NAME_LIST@ (default: automatic)
 
 @PLSSVM_SYCL_MANPAGE_ENTRY@
 
+@PLSSVM_KOKKOS_MANPAGE_ENTRY@
+
 @PLSSVM_PERFORMANCE_TRACKER_MANPAGE_ENTRY@
 
 .TP
diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox
index 84e561a46..fd23efcbc 100644
--- a/docs/resources/dirs.dox
+++ b/docs/resources/dirs.dox
@@ -153,6 +153,61 @@
  * @brief Directory containing kernel implementations for utility functions using the HIP backend.
  */
 
+/**
+ * @dir include/plssvm/backends/Kokkos
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing the implementation for the Kokkos backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/Kokkos/detail
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementation details for the Kokkos backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/Kokkos/kernel
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing all kernels for the Kokkos backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/Kokkos/kernel/cg_explicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the explicit CG algorithm using the Kokkos backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/Kokkos/kernel/cg_implicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the implicit CG algorithm using the Kokkos backend.
+ */
+
 /**
  * @dir include/plssvm/backends/OpenCL
  * @author Alexander Van Craen
@@ -345,7 +400,7 @@
  * @dir include/plssvm/backends/HPX/detail
  * @author Alexander Van Craen
  * @author Marcel Breyer
- * @authir Alexander Strack
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -381,7 +436,7 @@
  * @dir include/plssvm/backends/HPX/kernel/cg_implicit
  * @author Alexander Van Craen
  * @author Marcel Breyer
- * @author Alexander Strack 
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
diff --git a/examples/python/main.py b/examples/python/main.py
index a200524ff..5efd985ec 100644
--- a/examples/python/main.py
+++ b/examples/python/main.py
@@ -1,9 +1,6 @@
 import plssvm
 from sklearn.metrics import classification_report
 
-# correctly initialize and finalize environments
-environment_guard = plssvm.environment.ScopeGuard()
-
 try:
     # create a new C-SVM parameter set, explicitly overriding the default kernel function
     params = plssvm.Parameter(kernel_type=plssvm.KernelFunctionType.POLYNOMIAL)
diff --git a/examples/python/sklearn_like_svc.py b/examples/python/sklearn_like_svc.py
index 57ab3e148..4b5e5f44f 100644
--- a/examples/python/sklearn_like_svc.py
+++ b/examples/python/sklearn_like_svc.py
@@ -1,9 +1,6 @@
 from sklearn.datasets import make_classification
 import plssvm
 
-# correctly initialize and finalize environments
-environment_guard = plssvm.environment.ScopeGuard()
-
 num_samples = 2**8
 num_features = 2**6
 
diff --git a/include/plssvm/backend_types.hpp b/include/plssvm/backend_types.hpp
index 449f5dcdd..90a19bd5c 100644
--- a/include/plssvm/backend_types.hpp
+++ b/include/plssvm/backend_types.hpp
@@ -36,6 +36,8 @@ enum class backend_type {
     automatic,
     /** [OpenMP](https://www.openmp.org/) to target CPUs only (currently no OpenMP target offloading support). */
     openmp,
+    /** [HPX] (https://hpx.stellar-group.org/) to target CPUs only (currently no GPU support). */
+    hpx,
     /** [C++ stdpar](https://en.cppreference.com/w/cpp/algorithm#Execution_policies) to target CPUs and GPUs from different vendors using C++ standard library parallel algorithms. */
     stdpar,
     /** [CUDA](https://developer.nvidia.com/cuda-zone) to target NVIDIA GPUs only. */
@@ -46,8 +48,8 @@ enum class backend_type {
     opencl,
     /** [SYCL](https://www.khronos.org/sycl/) to target CPUs and GPUs from different vendors. Currently tested SYCL implementations are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL). */
     sycl,
-    /** [HPX] (https://hpx.stellar-group.org/) to target CPUs only (currently no GPU support). */
-    hpx
+    /** [Kokkos](https://github.com/kokkos/kokkos) to target CPUs and GPUs from different vendors. */
+    kokkos
 };
 
 /**
@@ -93,6 +95,7 @@ namespace hip { class csvm; }
 namespace opencl { class csvm; }
 namespace adaptivecpp { class csvm; }
 namespace dpcpp { class csvm; }
+namespace kokkos { class csvm; }
 
 // clang-format on
 
@@ -182,6 +185,15 @@ struct csvm_to_backend_type<dpcpp::csvm> {
     constexpr static sycl::implementation_type impl = sycl::implementation_type::dpcpp;
 };
 
+/**
+ * @brief Sets the `value` to `plssvm::backend_type::kokkos` for the Kokkos C-SVM.
+ */
+template <>
+struct csvm_to_backend_type<kokkos::csvm> {
+    /// The enum value representing the Kokkos backend.
+    constexpr static backend_type value = backend_type::kokkos;
+};
+
 }  // namespace detail
 
 /// @endcond
diff --git a/include/plssvm/backends/Kokkos/csvm.hpp b/include/plssvm/backends/Kokkos/csvm.hpp
new file mode 100644
index 000000000..2ff662933
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/csvm.hpp
@@ -0,0 +1,217 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a C-SVM using the Kokkos backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_
+#pragma once
+
+#include "plssvm/backends/execution_range.hpp"               // plssvm::detail::{dim_type, execution_range}
+#include "plssvm/backends/gpu_csvm.hpp"                      // plssvm::detail::gpu_csvm
+#include "plssvm/backends/Kokkos/detail/device_ptr.hpp"      // plssvm::kokkos::detail::device_ptr
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"  // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp"   // plssvm::kokkos::detail::pinned_memory
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::execution_space
+#include "plssvm/constants.hpp"                              // plssvm::real_type
+#include "plssvm/csvm.hpp"                                   // plssvm::detail::csvm_backend_exists
+#include "plssvm/detail/igor_utility.hpp"                    // plssvm::detail::get_value_from_named_parameter
+#include "plssvm/detail/memory_size.hpp"                     // plssvm::detail::memory_size
+#include "plssvm/detail/type_traits.hpp"                     // PLSSVM_REQUIRES
+#include "plssvm/parameter.hpp"                              // plssvm::parameter, plssvm::detail::parameter
+#include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
+
+#include "igor/igor.hpp"  // igor::parser
+
+#include <cstddef>      // std::size_t
+#include <type_traits>  // std::true_type
+#include <utility>      // std::forward
+#include <vector>       // std::vector
+
+namespace plssvm {
+
+namespace kokkos {
+
+/**
+ * @brief A C-SVM implementation using Kokkos as backend.
+ */
+class csvm : public ::plssvm::detail::gpu_csvm<detail::device_ptr, detail::device_wrapper, detail::pinned_memory> {
+  protected:
+    // protected for the test mock class
+    /// The template base type of the Kokkos C-SVM class.
+    using base_type = ::plssvm::detail::gpu_csvm<detail::device_ptr, detail::device_wrapper, detail::pinned_memory>;
+
+    using base_type::data_distribution_;
+    using base_type::devices_;
+
+  public:
+    using base_type::device_ptr_type;
+    using typename base_type::pinned_memory_type;
+    using typename base_type::queue_type;
+
+    /**
+     * @brief Construct a new C-SVM using the Kokkos backend with the parameters given through @p params.
+     * @param[in] params struct encapsulating all possible parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::kokkos::backend_exception if the requested target is not available
+     * @throws plssvm::kokkos::backend_exception if no device for the requested target was found
+     */
+    explicit csvm(parameter params = {});
+    /**
+     * @brief Construct a new C-SVM using the Kokkos backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVM
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::kokkos::backend_exception if the requested target is not available
+     * @throws plssvm::kokkos::backend_exception if no device for the requested target was found
+     */
+    explicit csvm(target_platform target, parameter params = {});
+
+    /**
+     * @brief Construct a new C-SVM using the Kokkos backend and the optionally provided @p named_args.
+     * @param[in] named_args the additional optional named arguments
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::kokkos::backend_exception if the requested target is not available
+     * @throws plssvm::kokkos::backend_exception if no device for the requested target was found
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
+    explicit csvm(Args &&...named_args) :
+        csvm{ plssvm::target_platform::automatic, std::forward<Args>(named_args)... } { }
+
+    /**
+     * @brief Construct a new C-SVM using the Kokkos backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] target the target platform used for this C-SVM
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::kokkos::backend_exception if the requested target is not available
+     * @throws plssvm::kokkos::backend_exception if no device for the requested target was found
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_kokkos_parameter_named_args_v<Args...>)>
+    explicit csvm(const target_platform target, Args &&...named_args) :
+        base_type{ std::forward<Args>(named_args)... } {
+        // check igor parameter
+        igor::parser parser{ std::forward<Args>(named_args)... };
+
+        // check whether a specific Kokkos execution space has been requested
+        if constexpr (parser.has(kokkos_execution_space)) {
+            // compile time check: the value must have the correct type
+            space_ = ::plssvm::detail::get_value_from_named_parameter<kokkos::execution_space>(parser, kokkos_execution_space);
+        }
+        this->init(target);
+    }
+
+    /**
+     * @copydoc plssvm::csvm::csvm(const plssvm::csvm &)
+     */
+    csvm(const csvm &) = delete;
+    /**
+     * @copydoc plssvm::csvm::csvm(plssvm::csvm &&) noexcept
+     */
+    csvm(csvm &&) noexcept = default;
+    /**
+     * @copydoc plssvm::csvm::operator=(const plssvm::csvm &)
+     */
+    csvm &operator=(const csvm &) = delete;
+    /**
+     * @copydoc plssvm::csvm::operator=(plssvm::csvm &&) noexcept
+     */
+    csvm &operator=(csvm &&) noexcept = default;
+    /**
+     * @brief Wait for all operations on all Kokkos devices to finish.
+     * @details Terminates the program, if any exception is thrown.
+     */
+    ~csvm() override;
+
+    /**
+     * @brief Return the currently used Kokkos `execution_space`.
+     * @return the execution space (`[[nodiscard]]`)
+     */
+    [[nodiscard]] execution_space get_execution_space() const noexcept { return space_; }
+
+  protected:
+    /**
+     * @brief Initialize all important states related to the Kokkos backend.
+     * @param[in] target the target platform to use
+     * @throws plssvm::kokkos::backend_exception if the requested target is not available
+     * @throws plssvm::kokkos::backend_exception if no device for the requested target was found
+     */
+    void init(target_platform target);
+
+    /**
+     * @copydoc plssvm::csvm::get_device_memory
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_device_memory() const final;
+    /**
+     * @copydoc plssvm::csvm::get_max_mem_alloc_size
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_max_mem_alloc_size() const final;
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::get_max_work_group_size
+     */
+    [[nodiscard]] std::size_t get_max_work_group_size(std::size_t device_id) const final;
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::get_max_grid_size
+     */
+    [[nodiscard]] ::plssvm::detail::dim_type get_max_grid_size(std::size_t device_id) const override;
+
+    //***************************************************//
+    //                        fit                        //
+    //***************************************************//
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::run_assemble_kernel_matrix_explicit
+     */
+    [[nodiscard]] device_ptr_type run_assemble_kernel_matrix_explicit(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter &params, const device_ptr_type &data_d, const device_ptr_type &q_red_d, real_type QA_cost) const final;
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::run_blas_level_3_kernel_explicit
+     */
+    void run_blas_level_3_kernel_explicit(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const ::plssvm::detail::execution_range &mirror_exec, real_type alpha, const device_ptr_type &A_d, const device_ptr_type &B_d, real_type beta, device_ptr_type &C_d) const final;
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::run_assemble_kernel_matrix_implicit_blas_level_3
+     */
+    void run_assemble_kernel_matrix_implicit_blas_level_3(std::size_t device_id, const ::plssvm::detail::execution_range &exec, real_type alpha, const device_ptr_type &A_d, const parameter &params, const device_ptr_type &q_red_d, real_type QA_cost, const device_ptr_type &B_d, device_ptr_type &C_d) const final;
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::run_inplace_matrix_addition
+     */
+    void run_inplace_matrix_addition(std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const device_ptr_type &rhs_d) const override;
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::run_inplace_matrix_scale
+     */
+    void run_inplace_matrix_scale(std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, real_type scale) const override;
+
+    //***************************************************//
+    //                   predict, score                  //
+    //***************************************************//
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::run_w_kernel
+     */
+    [[nodiscard]] device_ptr_type run_w_kernel(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const device_ptr_type &alpha_d, const device_ptr_type &sv_d) const final;
+    /**
+     * @copydoc plssvm::detail::gpu_csvm::run_predict_kernel
+     */
+    [[nodiscard]] device_ptr_type run_predict_kernel(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter &params, const device_ptr_type &alpha_d, const device_ptr_type &rho_d, const device_ptr_type &sv_or_w_d, const device_ptr_type &predict_points_d) const final;
+
+    /// The used Kokkos execution space.
+    execution_space space_{};
+};
+
+}  // namespace kokkos
+
+namespace detail {
+
+/**
+ * @brief Sets the `value` to `true` since C-SVMs using the Kokkos backend are available.
+ */
+template <>
+struct csvm_backend_exists<kokkos::csvm> : std::true_type { };
+
+}  // namespace detail
+
+}  // namespace plssvm
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp
new file mode 100644
index 000000000..559c9e75c
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp
@@ -0,0 +1,238 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Conditionally defined macros for the different available Kokkos ExecutionSpaces.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_CONDITIONAL_EXECUTION_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_CONDITIONAL_EXECUTION_HPP_
+#pragma once
+
+#include "plssvm/backends/Kokkos/exceptions.hpp"       // plssvm::kokkos::backend_exception
+#include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::execution_space
+
+#include "Kokkos_Core.hpp"  // Kokkos macros
+
+#include "fmt/core.h"  // fmt::format
+
+#include <functional>  // std::invoke
+
+namespace plssvm::kokkos::detail {
+
+//***************************************************//
+//                    Kokkos::Cuda                   //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA` macro if `KOKKOS_ENABLE_CUDA` is defined, i.e., the Kokkos CUDA ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_CUDA` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA` macro if `KOKKOS_ENABLE_CUDA` is defined, i.e., the Kokkos CUDA ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_CUDA` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_CUDA)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::cuda) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::cuda) }
+#endif
+
+//***************************************************//
+//                    Kokkos::HIP                    //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP` macro if `KOKKOS_ENABLE_HIP` is defined, i.e., the Kokkos HIP ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_HIP` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP` macro if `KOKKOS_ENABLE_HIP` is defined, i.e., the Kokkos HIP ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_HIP` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_HIP)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hip) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hip) }
+#endif
+
+//***************************************************//
+//                    Kokkos::SYCL                   //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL` macro if `KOKKOS_ENABLE_SYCL` is defined, i.e., the Kokkos SYCL ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_SYCL` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL` macro if `KOKKOS_ENABLE_SYCL` is defined, i.e., the Kokkos SYCL ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_SYCL` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_SYCL)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::sycl) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::sycl) }
+#endif
+
+//***************************************************//
+//             Kokkos::Experimental::HPX             //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX` macro if `KOKKOS_ENABLE_HPX` is defined, i.e., the Kokkos HPX ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_HPX` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX` macro if `KOKKOS_ENABLE_HPX` is defined, i.e., the Kokkos HPX ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_HPX` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_HPX)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hpx) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hpx) }
+#endif
+
+//***************************************************//
+//                  Kokkos::OpenMP                   //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP` macro if `KOKKOS_ENABLE_OPENMP` is defined, i.e., the Kokkos OpenMP ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_OPENMP` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP` macro if `KOKKOS_ENABLE_OPENMP` is defined, i.e., the Kokkos OpenMP ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_OPENMP` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_OPENMP)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp) }
+#endif
+
+//***************************************************//
+//        Kokkos::Experimental::OpenMPTarget         //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET` macro if `KOKKOS_ENABLE_OPENMPTARGET` is defined, i.e., the Kokkos OpenMP target offloading ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_OPENMPTARGET` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET` macro if `KOKKOS_ENABLE_OPENMPTARGET` is defined, i.e., the Kokkos OpenMP target offloading ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_OPENMPTARGET` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp_target) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp_target) }
+#endif
+
+//***************************************************//
+//           Kokkos::Experimental::OpenACC           //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC` macro if `KOKKOS_ENABLE_OPENACC` is defined, i.e., the Kokkos OpenACC ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_OPENACC` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC` macro if `KOKKOS_ENABLE_OPENACC` is defined, i.e., the Kokkos OpenACC ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_OPENACC` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_OPENACC)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openacc) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openacc) }
+#endif
+
+//***************************************************//
+//                  Kokkos::Threads                  //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS` macro if `KOKKOS_ENABLE_THREADS` is defined, i.e., the Kokkos std::thread ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_THREADS` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS` macro if `KOKKOS_ENABLE_THREADS` is defined, i.e., the Kokkos std::thread ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_THREADS` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ */
+#if defined(KOKKOS_ENABLE_THREADS)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::threads) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::threads) }
+#endif
+
+//***************************************************//
+//                   Kokkos::Serial                  //
+//***************************************************//
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL` macro if `KOKKOS_ENABLE_SERIAL` is defined, i.e., the Kokkos serial ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_SERIAL` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception.
+ * @note This ExecutionSpace *should* always be available!
+ *
+ * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL
+ * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL` macro if `KOKKOS_ENABLE_SERIAL` is defined, i.e., the Kokkos serial ExecutionSpace is available.
+ * @details If `KOKKOS_ENABLE_SERIAL` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception.
+ * @note This ExecutionSpace *should* always be available!
+ */
+#if defined(KOKKOS_ENABLE_SERIAL)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL(func) return std::invoke(func)
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(func) std::invoke(func)
+#else
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::serial) }
+    #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(func) \
+        throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::serial) }
+#endif
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_CONDITIONAL_EXECUTION_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp
new file mode 100644
index 000000000..80d3f8cd9
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp
@@ -0,0 +1,73 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Function to list all available execution spaces at compile time.
+ * @note Must be a separate file such that the Kokkos header must not be included in the "execution_space.hpp" file.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_
+
+/**
+ * @def PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES
+ * @brief Set the macro `PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES` to 0 if it isn't already defined, i.e., no Kokkos execution space is available.
+ *        Will normally be propagated by CMake with the number of available Kokkos execution spaces.
+ */
+#if !defined(PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES)
+    #define PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES 0
+#endif
+
+#include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::execution_space
+
+#include <array>  // std::array
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief List all available Kokkos::ExecutionSpaces at compile time.
+ * @details At least one execution space must **always** be available!
+ * @return a `std::array` containing all available execution spaces (`[[nodiscard]]`)
+ */
+[[nodiscard]] inline constexpr auto constexpr_available_execution_spaces() noexcept {
+    // Note: The execution_space::automatic value may NEVER be added here!
+    // Note: the trailing comma is explicitly allowed by the standard
+    // Note: the order is intentionally chosen this way -> the order of the entries determines the priority when using a backend to run our code
+    return std::array<execution_space, PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES>{
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_CUDA)
+        execution_space::cuda,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_HIP)
+        execution_space::hip,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_SYCL)
+        execution_space::sycl,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMPTARGET)
+        execution_space::openmp_target,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_OPENACC)
+        execution_space::openacc,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMP)
+        execution_space::openmp,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_THREADS)
+        execution_space::threads,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_HPX)
+        execution_space::hpx,
+#endif
+#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_SERIAL)
+        execution_space::serial,
+#endif
+    };
+}
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp
new file mode 100644
index 000000000..ad067d00b
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp
@@ -0,0 +1,134 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Small wrapper around a Kokkos::View.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_PTR_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_PTR_HPP_
+#pragma once
+
+#include "plssvm/backends/gpu_device_ptr.hpp"                     // plssvm::detail::gpu_device_ptr
+#include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp"  // plssvm::kokkos::detail::device_view_wrapper
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"       // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/shape.hpp"                                       // plssvm::shape
+
+#include <cstddef>  // std::size_t
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief Small wrapper class around a Kokkos view together with commonly used device functions.
+ * @tparam T the type of the kernel view to wrap
+ */
+template <typename T>
+class device_ptr : public ::plssvm::detail::gpu_device_ptr<T, device_wrapper, device_view_wrapper<T *>, device_ptr<T>> {
+    /// The template base type of the Kokkos device_ptr class.
+    using base_type = ::plssvm::detail::gpu_device_ptr<T, device_wrapper, device_view_wrapper<T *>, device_ptr<T>>;
+
+    using base_type::data_;
+    using base_type::queue_;
+    using base_type::shape_;
+
+  public:
+    // Be able to use overloaded base class functions.
+    using base_type::copy_to_device;
+    using base_type::copy_to_device_strided;
+    using base_type::copy_to_host;
+    using base_type::copy_to_other_device;
+    using base_type::fill;
+    using base_type::memset;
+
+    using typename base_type::const_host_pointer_type;
+    using typename base_type::device_pointer_type;
+    using typename base_type::host_pointer_type;
+    using typename base_type::queue_type;
+    using typename base_type::size_type;
+    using typename base_type::value_type;
+
+    /**
+     * @brief Default construct a Kokkos device_ptr with a size of 0.
+     * @details Always associated with device 0.
+     */
+    device_ptr() = default;
+    /**
+     * @brief Allocates `size * sizeof(T)` bytes in the Kokkos execution space @p exec.
+     * @param[in] size the number of elements represented by the device_ptr
+     * @param[in] device the device wrapper
+     */
+    explicit device_ptr(size_type size, const device_wrapper &device);
+    /**
+     * @brief Allocates `shape.x * shape.y * sizeof(T)` bytes in the Kokkos execution space @p exec.
+     * @param[in] shape the number of elements represented by the device_ptr
+     * @param[in] device the device wrapper
+     */
+    explicit device_ptr(plssvm::shape shape, const device_wrapper &device);
+    /**
+     * @brief Allocates `(shape.x + padding.x) * (shape.y + padding.y) * sizeof(T)` bytes in the Kokkos execution space @p exec.
+     * @param[in] shape the number of elements represented by the device_ptr
+     * @param[in] padding the number of padding elements added to the extent values
+     * @param[in] device the device wrapper
+     */
+    device_ptr(plssvm::shape shape, plssvm::shape padding, const device_wrapper &device);
+
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::gpu_device_ptr(const plssvm::detail::gpu_device_ptr &)
+     */
+    device_ptr(const device_ptr &) = delete;
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::gpu_device_ptr(plssvm::detail::gpu_device_ptr &&)
+     */
+    device_ptr(device_ptr &&other) noexcept = default;
+
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::operator=(const plssvm::detail::gpu_device_ptr &)
+     */
+    device_ptr &operator=(const device_ptr &) = delete;
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::operator=(plssvm::detail::gpu_device_ptr &&)
+     */
+    device_ptr &operator=(device_ptr &&other) noexcept = default;
+
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::~gpu_device_ptr()
+     * @details Kokkos automatically frees the memory of a Kokkos::View if the View goes out of scope.
+     */
+    ~device_ptr() override = default;
+
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::memset(int, size_type, size_type)
+     */
+    void memset(int pattern, size_type pos, size_type num_bytes) override;
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::fill(value_type, size_type, size_type)
+     */
+    void fill(value_type value, size_type pos, size_type count) override;
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::copy_to_device(const_host_pointer_type, size_type, size_type)
+     */
+    void copy_to_device(const_host_pointer_type data_to_copy, size_type pos, size_type count) override;
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::copy_to_device_strided(const_host_pointer_type, std::size_t, std::size_t, std::size_t)
+     */
+    void copy_to_device_strided(const_host_pointer_type data_to_copy, std::size_t spitch, std::size_t width, std::size_t height) override;
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::copy_to_host(host_pointer_type, size_type, size_type) const
+     */
+    void copy_to_host(host_pointer_type buffer, size_type pos, size_type count) const override;
+    /**
+     * @copydoc plssvm::detail::gpu_device_ptr::copy_to_other_device(derived_gpu_device_ptr &, size_type, size_type) const
+     */
+    void copy_to_other_device(device_ptr &target, size_type pos, size_type count) const override;
+};
+
+extern template class device_ptr<float>;
+extern template class device_ptr<double>;
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_PTR_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp
new file mode 100644
index 000000000..ea60bb1fd
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp
@@ -0,0 +1,187 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief A wrapper around a Kokkos::View.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_
+
+#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp"  // plssvm::kokkos::detail::constexpr_available_execution_spaces
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"                        // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/execution_space.hpp"                              // plssvm::kokkos::execution_space
+#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp"                  // plssvm::kokkos::execution_space_to_kokkos_type_t
+#include "plssvm/detail/type_traits.hpp"                                           // plssvm::detail::remove_cvref_t
+
+#include "Kokkos_Core.hpp"  // Kokkos::View, Kokkos::ExecutionSpace
+
+#include <array>       // std::array
+#include <cstddef>     // std::size_t
+#include <functional>  // std::invoke
+#include <utility>     // std::make_index_sequence, std::index_sequence, std::move
+#include <variant>     // std::variant, std::get, std::visit
+
+namespace plssvm::kokkos::detail {
+
+namespace impl {
+
+/**
+ * @brief Uninstantiated base type to create a `std::variant` containing all available Kokkos::View types.
+ */
+template <typename, typename>
+struct create_view_variant_type_helper;
+
+/**
+ * @brief Helper struct to create a `std::variant` containing all available Kokkos::View types by iterating over the `std::array` of
+ *        `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
+ * @tparam T the value type of the underlying Kokkos::View
+ * @tparam Is the indices to index the `std::array`
+ */
+template <typename T, std::size_t... Is>
+struct create_view_variant_type_helper<T, std::index_sequence<Is...>> {
+    /// The array containing all available execution spaces.
+    constexpr static auto array = detail::constexpr_available_execution_spaces();
+    /// The resulting variant type.
+    using type = std::variant<Kokkos::View<T, execution_space_to_kokkos_type_t<array[Is]>>...>;
+};
+
+/**
+ * @brief Create a `std::variant` containing all available Kokkos::View types by iterating over the `std::array` of
+ *        `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
+ * @tparam T the value type of the underlying Kokkos::View
+ */
+template <typename T>
+struct create_view_variant_type {
+    /// The number of types in the final variant.
+    constexpr static std::size_t N = detail::constexpr_available_execution_spaces().size();
+    /// The final variant type.
+    using type = typename create_view_variant_type_helper<T, std::make_index_sequence<N>>::type;
+};
+
+}  // namespace impl
+
+/**
+ * @brief A wrapper class around a `std::variant` that contains all available Kokkos::View types.
+ * @tparam T the value type of the underlying Kokkos::View
+ */
+template <typename T>
+class device_view_wrapper {
+  public:
+    /// The `std::variant` type containing all Kokkos::View types.
+    using variant_type = typename impl::create_view_variant_type<T>::type;
+
+    /**
+     * @brief Default construct the `std::variant` wrapper.
+     */
+    device_view_wrapper() = default;
+
+    /**
+     * @brief Construct the wrapper using the provided Kokkos::View instance by forwarding its value to the underlying `std::variant`.
+     * @tparam ExecutionSpace the used Kokkos::ExecutionSpace type of the Kokkos::View
+     * @param[in] view the Kokkos::View instance
+     */
+    template <typename ExecutionSpace>
+    explicit device_view_wrapper(Kokkos::View<T, ExecutionSpace> &&view) :
+        v_{ std::move(view) } { }
+
+    /**
+     * @brief Given the provided `execution_space` enum value, tries to get the `std::variant` alternative for the corresponding Kokkos::ExecutionSpace type.
+     * @tparam space the `execution_space` enum value
+     * @return the Kokkos::View instance (`[[nodiscard]]`)
+     */
+    template <execution_space space>
+    [[nodiscard]] Kokkos::View<T, execution_space_to_kokkos_type_t<space>> &get() {
+        return std::get<Kokkos::View<T, execution_space_to_kokkos_type_t<space>>>(v_);
+    }
+
+    /**
+     * @copydoc plssvm::kokkos::detail::device_view_wrapper::get
+     */
+    template <execution_space space>
+    [[nodiscard]] const Kokkos::View<T, execution_space_to_kokkos_type_t<space>> &get() const {
+        return std::get<Kokkos::View<T, execution_space_to_kokkos_type_t<space>>>(v_);
+    }
+
+    /**
+     * @brief Return the `execution_space` enum value of the currently active `std::variant` Kokkos::View type.
+     * @return the `execution_space` enum value (`[[nodiscard]]`)
+     */
+    [[nodiscard]] execution_space get_execution_space() const noexcept {
+        return detail::constexpr_available_execution_spaces()[v_.index()];
+    }
+
+    /**
+     * @brief Invoke the function @p func on the active `std::variant` member using `std::visit` internally.
+     * @tparam Func the type of the function
+     * @param[in] func the function to invoke
+     */
+    template <typename Func>
+    void execute(const Func &func) {
+        // clang-format off
+        std::visit([&func](auto &view) {
+            std::invoke(func, view);
+        }, v_);
+        // clang-format on
+    }
+
+    /**
+     * @copydoc plssvm::kokkos::detail::device_view_wrapper::execute
+     */
+    template <typename Func>
+    void execute(const Func &func) const {
+        // clang-format off
+        std::visit([&func](const auto &view) {
+            std::invoke(func, view);
+        }, v_);
+        // clang-format on
+    }
+
+    /**
+     * @brief Compare two device view wrappers for equality by comparing the wrapped `std::variant`s.
+     * @param[in] lhs the first device view wrapper
+     * @param[in] rhs the second device view wrapper
+     * @return `true` if both underlying `std::variant`s are equal, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] friend bool operator==(const device_view_wrapper &lhs, const device_view_wrapper &rhs) noexcept {
+        return lhs.v_ == rhs.v_;
+    }
+
+    /**
+     * @brief Compare two device view wrappers for inequality by comparing the wrapped `std::variant`s.
+     * @param[in] lhs the first device view wrapper
+     * @param[in] rhs the second device view wrapper
+     * @return `true` if both underlying `std::variant`s are unequal, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] friend bool operator!=(const device_view_wrapper &lhs, const device_view_wrapper &rhs) noexcept {
+        return !(lhs == rhs);
+    }
+
+  private:
+    /// The wrapped `std::variant` type.
+    variant_type v_;
+};
+
+/**
+ * @brief Given a execution @p space and the number of elements @p size, creates a Kokkos::View in the respective memory space.
+ * @tparam T the value type of the underlying Kokkos::View
+ * @param[in] device the device for which this view should be allocated
+ * @param[in] size the size of the Kokkos::View (number of elements **not** byte!)
+ * @return a Kokkos::View wrapper where the active member of the internal `std::variant` corresponds to the Kokkos::View in the Kokkos::ExecutionSpace specified by @p space (`[[nodiscard]]`)
+ */
+template <typename T>
+[[nodiscard]] device_view_wrapper<T> make_device_view_wrapper(const device_wrapper &device, const std::size_t size) {
+    return device.execute_and_return([&](const auto &value) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(value)>;
+
+        return device_view_wrapper{ Kokkos::View<T, kokkos_execution_space_type>{ Kokkos::view_alloc(value, "device_ptr_view"), size } };
+    });
+}
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp
new file mode 100644
index 000000000..da0aaf755
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp
@@ -0,0 +1,199 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief A wrapper around a Kokkos::ExecutionSpace representing a single device.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_
+
+#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp"  // plssvm::kokkos::detail::constexpr_available_execution_spaces
+#include "plssvm/backends/Kokkos/execution_space.hpp"                              // plssvm::kokkos::execution_space
+#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp"                  // plssvm::kokkos::execution_space_to_kokkos_type_t
+#include "plssvm/target_platforms.hpp"                                             // plssvm::target_platform
+
+#include <array>       // std::array
+#include <cstddef>     // std::size_t
+#include <functional>  // std::invoke
+#include <utility>     // std::make_index_sequence, std::index_sequence, std::forward
+#include <variant>     // std::variant, std::get, std::visit
+#include <vector>      // std::vector
+
+namespace plssvm::kokkos::detail {
+
+namespace impl {
+
+/**
+ * @brief Uninstantiated base type to create a `std::variant` containing all available Kokkos::ExecutionSpace types.
+ */
+template <typename>
+struct create_device_variant_type_helper;
+
+/**
+ * @brief Helper struct to create a `std::variant` containing all available Kokkos::ExecutionSpace types by iterating over the `std::array` of
+ *        `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
+ * @tparam Is the indices to index the `std::array`
+ */
+template <std::size_t... Is>
+struct create_device_variant_type_helper<std::index_sequence<Is...>> {
+    /// The array containing all available execution spaces.
+    constexpr static auto array = detail::constexpr_available_execution_spaces();
+    /// The resulting variant type.
+    using type = std::variant<execution_space_to_kokkos_type_t<array[Is]>...>;
+};
+
+/**
+ * @brief Create a `std::variant` containing all available Kokkos::ExecutionSpace types by iterating over the `std::array` of
+ *        `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
+ */
+struct create_device_variant_type {
+    /// The number of types in the final variant.
+    constexpr static std::size_t N = detail::constexpr_available_execution_spaces().size();
+    /// The final variant type.
+    using type = typename create_device_variant_type_helper<std::make_index_sequence<N>>::type;
+};
+
+}  // namespace impl
+
+/**
+ * @brief A wrapper class around a `std::variant` that contains all available Kokkos::ExecutionSpace types.
+ */
+class device_wrapper {
+  public:
+    /// The `std::variant` type containing all Kokkos::ExecutionSpace types.
+    using variant_type = typename impl::create_device_variant_type::type;
+
+    /**
+     * @brief Default construct the `std::variant` wrapper.
+     */
+    device_wrapper() = default;
+
+    /**
+     * @brief Construct the wrapper using the provided Kokkos::ExecutionSpace instance by forwarding its value to the underlying `std::variant`.
+     * @tparam ExecutionSpace the used Kokkos::ExecutionSpace type
+     * @param[in] exec the Kokkos::ExecutionSpace instance
+     */
+    template <typename ExecutionSpace>
+    explicit device_wrapper(ExecutionSpace &&exec) :
+        v_{ std::forward<ExecutionSpace>(exec) } { }
+
+    /**
+     * @brief Given the provided `execution_space` enum value, tries to get the `std::variant` alternative for the corresponding Kokkos::ExecutionSpace type.
+     * @tparam space the `execution_space` enum value
+     * @return the Kokkos::ExecutionSpace instance (`[[nodiscard]]`)
+     */
+    template <execution_space space>
+    [[nodiscard]] execution_space_to_kokkos_type_t<space> &get() {
+        return std::get<execution_space_to_kokkos_type_t<space>>(v_);
+    }
+
+    /**
+     * @copydoc plssvm::kokkos::detail::device_wrapper::get
+     */
+    template <execution_space space>
+    const execution_space_to_kokkos_type_t<space> &get() const {
+        return std::get<execution_space_to_kokkos_type_t<space>>(v_);
+    }
+
+    /**
+     * @brief Return the `execution_space` enum value of the currently active `std::variant` Kokkos::ExecutionSpace type.
+     * @return the `execution_space` enum value (`[[nodiscard]]`)
+     */
+    [[nodiscard]] execution_space get_execution_space() const noexcept {
+        return detail::constexpr_available_execution_spaces()[v_.index()];
+    }
+
+    /**
+     * @brief Invoke the function @p func on the active `std::variant` member using `std::visit` internally.
+     * @tparam Func the type of the function
+     * @param[in] func the function to invoke
+     */
+    template <typename Func>
+    void execute(const Func &func) {
+        // clang-format off
+        std::visit([&func](auto &device) {
+            std::invoke(func, device);
+        }, v_);
+        // clang-format on
+    }
+
+    /**
+     * @copydoc plssvm::kokkos::detail::device_wrapper::execute
+     */
+    template <typename Func>
+    void execute(const Func &func) const {
+        // clang-format off
+        std::visit([&func](const auto &device) {
+            std::invoke(func, device);
+        }, v_);
+        // clang-format on
+    }
+
+    /**
+     * @brief Invoke the function @p func on the active `std::variant` member using `std::visit` internally returning the result value of the function invocation.
+     * @tparam Func the type of the function
+     * @param[in] func the function to invoke
+     * @return the return value of function @p func (`[[nodiscard]]`)
+     */
+    template <typename Func>
+    [[nodiscard]] auto execute_and_return(const Func &func) {
+        // clang-format off
+        return std::visit([&func](auto &device) {
+            return std::invoke(func, device);
+        }, v_);
+        // clang-format on
+    }
+
+    /**
+     * @copydoc plssvm::kokkos::detail::device_wrapper::execute_and_return
+     */
+    template <typename Func>
+    [[nodiscard]] auto execute_and_return(const Func &func) const {
+        // clang-format off
+        return std::visit([&func](const auto &device) {
+            return std::invoke(func, device);
+        }, v_);
+        // clang-format on
+    }
+
+    /**
+     * @brief Compare two device wrappers for equality by comparing the wrapped `std::variant`s.
+     * @param[in] lhs the first device wrapper
+     * @param[in] rhs the second device wrapper
+     * @return `true` if both underlying `std::variant`s are equal, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] friend bool operator==(const device_wrapper &lhs, const device_wrapper &rhs) noexcept {
+        return lhs.v_ == rhs.v_;
+    }
+
+    /**
+     * @brief Compare two device wrappers for inequality by comparing the wrapped `std::variant`s.
+     * @param[in] lhs the first device wrapper
+     * @param[in] rhs the second device wrapper
+     * @return `true` if both underlying `std::variant`s are unequal, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] friend bool operator!=(const device_wrapper &lhs, const device_wrapper &rhs) noexcept {
+        return !(lhs == rhs);
+    }
+
+  private:
+    /// The wrapped `std::variant` type.
+    variant_type v_{};
+};
+
+/**
+ * @brief Get a list of all available devices in the execution @p space that are supported by the @p target platform.
+ * @param[in] space the Kokkos::ExecutionSpace to retrieve the devices from
+ * @param[in] target the target platform that must be supported
+ * @return all devices for the @p target in the Kokkos::ExecutionSpace @p space (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::vector<device_wrapper> get_device_list(execution_space space, target_platform target);
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp b/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp
new file mode 100644
index 000000000..cb328e6d3
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp
@@ -0,0 +1,93 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Small wrapper around RAII for registering memory as pinned memory.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_PINNED_MEMORY_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_PINNED_MEMORY_HPP_
+#pragma once
+
+#include "plssvm/backends/host_pinned_memory.hpp"  // plssvm::detail::host_pinned_memory
+#include "plssvm/matrix.hpp"                       // plssvm::matrix, plssvm::layout_type
+
+#include <cstddef>  // std::size_t
+#include <vector>   // std::vector
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief A small RAII wrapper class to register/unregister pinned memory.
+ * @tparam T the type of the data array that should be pinned
+ */
+template <typename T>
+class [[nodiscard]] pinned_memory final : public ::plssvm::detail::host_pinned_memory<T> {
+    /// The template base type of the CUDA pinned_memory class.
+    using base_type = ::plssvm::detail::host_pinned_memory<T>;
+
+    using base_type::is_pinned_;
+    using base_type::ptr_;
+
+  public:
+    using typename base_type::value_type;
+
+    /**
+     * @brief Register the memory managed by the matrix @p matr to use pinned memory.
+     * @tparam layout the layout type of the matrix
+     * @param[in] matr the memory to pin
+     */
+    template <layout_type layout>
+    explicit pinned_memory(const matrix<T, layout> &matr) :
+        pinned_memory{ matr.data(), matr.size_padded() } { }
+
+    /**
+     * @brief Register the memory managed by the vector @p vec to use pinned memory.
+     * @param[in] vec the memory to pin
+     */
+    explicit pinned_memory(const std::vector<T> &vec);
+    /**
+     * @brief Register the memory managed by the pointer @p ptr with @p size to use pinned memory.
+     * @param[in] ptr the memory to pin
+     * @param[in] size the number of elements in the memory region to pin (**not** bytes!)
+     */
+    pinned_memory(const T *ptr, std::size_t size);
+    /**
+     * @brief Unregister the memory managed by this object.
+     */
+    ~pinned_memory() override;
+
+    /**
+     * @brief Must provide a memory that should be pinned.
+     */
+    pinned_memory() = delete;
+    /**
+     * @brief Delete the copy-constructor.
+     */
+    pinned_memory(const pinned_memory &) = delete;
+    /**
+     * @brief Delete the move-constructor.
+     */
+    pinned_memory(pinned_memory &&) noexcept = delete;
+    /**
+     * @brief Delete the copy-assignment operator.
+     * @return `*this`
+     */
+    pinned_memory &operator=(const pinned_memory &) = delete;
+    /**
+     * @brief Delete the move-assignment operator.
+     * @return `*this`
+     */
+    pinned_memory &operator=(pinned_memory &&) noexcept = delete;
+};
+
+extern template class pinned_memory<float>;
+extern template class pinned_memory<double>;
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_PINNED_MEMORY_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp b/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp
new file mode 100644
index 000000000..5b26f5e98
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp
@@ -0,0 +1,131 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implementation of a basic and minimalistic tuple class which is standard-layout conform.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_STANDARD_LAYOUT_TUPLE_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_STANDARD_LAYOUT_TUPLE_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp"  // plssvm::real_type
+
+#include "Kokkos_Core.hpp"  // KOKKOS_INLINE_FUNCTION
+
+#include <cstddef>      // std::size_t
+#include <type_traits>  // std::is_standard_layout
+#include <utility>      // std::forward
+
+namespace plssvm::kokkos::detail {
+
+/*
+ * Empty base implementation.
+ */
+template <typename...>
+struct standard_layout_tuple;
+
+/**
+ * @brief Save the value of type @p T as scalar and the remaining values of type @p Rest recursively in another standard layout tuple.
+ * @tparam T the type of the value to save in this tuple
+ * @tparam Rest the remaining types saved in a recursive tuple
+ */
+template <typename T, typename... Rest>
+struct standard_layout_tuple<T, Rest...> {
+    /// The stored value.
+    T value;
+    /// The remaining values stored in their own tuple.
+    standard_layout_tuple<Rest...> remaining;
+};
+
+/**
+ * @brief Special case for an empty tuple (recursion termination criterion).
+ */
+template <>
+struct standard_layout_tuple<> { };
+
+namespace impl {
+
+/**
+ * @brief Recursively traverse (at compile time) the tuple @p t and retrieve the value at position @p I.
+ * @tparam I the index of the tuple value to get
+ */
+template <std::size_t I>
+struct get_impl {
+    /**
+     * @brief Recursively traverse (at compile time) the tuple @p t and retrieve the value at position @p I.
+     * @tparam Types the types in the tuple
+     * @param[in] t the tuple to traverse
+     * @return the requested value (`[[nodiscard]]`)
+     */
+    template <typename... Types>
+    KOKKOS_INLINE_FUNCTION constexpr static auto get(const standard_layout_tuple<Types...> &t) {
+        return get_impl<I - 1>::get(t.remaining);
+    }
+};
+
+/**
+ * @brief Special case to retrieve the currently held value (recursion termination criterion).
+ */
+template <>
+struct get_impl<0> {
+    /**
+     * @brief Get the held value from @p t.
+     * @tparam Types the types in the tuple
+     * @param[in] t the tuple to get the value from
+     * @return the requested value (`[[nodiscard]]`)
+     */
+    template <typename... Types>
+    KOKKOS_INLINE_FUNCTION constexpr static auto get(const standard_layout_tuple<Types...> &t) {
+        return t.value;
+    }
+};
+
+}  // namespace impl
+
+/**
+ * @brief Get the value at position @p I of the tuple @p t holding the @p Types.
+ * @tparam I the position of the element in the tuple to get
+ * @tparam Types the types stored in the tuple
+ * @param[in] t the tuple
+ * @return the value of the tuple @p t at position @p I (`[[nodiscard]]`)
+ */
+template <std::size_t I, typename... Types>
+KOKKOS_INLINE_FUNCTION constexpr auto get(const standard_layout_tuple<Types...> &t) {
+    static_assert(I < sizeof...(Types), "Invalid standard_layout_tuple index!");
+    return impl::get_impl<I>::get(t);
+}
+
+/**
+ * @brief Special case: return an empty tuple if no values have bee provided.
+ * @return an empty tuple (`[[nodiscard]]`)
+ */
+[[nodiscard]] inline constexpr standard_layout_tuple<> make_standard_layout_tuple() {
+    return standard_layout_tuple<>{};
+}
+
+/**
+ * @brief Create a new tuple storing the values @p arg and @p remaining.
+ * @tparam T the type of the first value
+ * @tparam Rest the types of the remaining values (if any)
+ * @param[in,out] arg the first value
+ * @param[in,out] remaining the remaining values (if any)
+ * @return the constructed tuple (`[[nodiscard]]`)
+ */
+template <typename T, typename... Rest>
+[[nodiscard]] inline constexpr standard_layout_tuple<T, Rest...> make_standard_layout_tuple(T &&arg, Rest &&...remaining) {
+    return standard_layout_tuple<T, Rest...>{ std::forward<T>(arg), make_standard_layout_tuple(std::forward<Rest>(remaining)...) };
+}
+
+// sanity checks: be sure that the important use cases are indeed standard layout types!
+static_assert(std::is_standard_layout_v<standard_layout_tuple<>>, "standard_layout_tuple<> has no standard layout!");
+static_assert(std::is_standard_layout_v<standard_layout_tuple<int, real_type, real_type>>, "standard_layout_tuple<int, real_type, real_type> has no standard layout!");
+static_assert(std::is_standard_layout_v<standard_layout_tuple<real_type>>, "standard_layout_tuple<real_type> has no standard layout!");
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_STANDARD_LAYOUT_TUPLE_HPP_
diff --git a/include/plssvm/backends/Kokkos/detail/utility.hpp b/include/plssvm/backends/Kokkos/detail/utility.hpp
new file mode 100644
index 000000000..9bbc9b172
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/detail/utility.hpp
@@ -0,0 +1,103 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Utility functions for the Kokkos backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_
+#pragma once
+
+#include "plssvm/backends/execution_range.hpp"               // plssvm::detail::dim_type
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"  // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::execution_space
+#include "plssvm/detail/type_traits.hpp"                     // PLSSVM_REQUIRES
+#include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
+
+#include "Kokkos_Core.hpp"  // Kokkos::ExecutionSpace::fence
+
+#include <map>          // std::map
+#include <string>       // std::string
+#include <type_traits>  // std::disjunction, std::is_same
+#include <variant>      // std::variant
+#include <vector>       // std::vector
+
+namespace plssvm::kokkos::detail {
+
+namespace impl {
+
+/**
+ * @brief Uninstantiated base type for the check whether a type @p appears in a std::variant @p Variant.
+ * @tparam T the type to check for inclusion
+ * @tparam Variant the std::variant that should include the type @p T
+ */
+template <typename T, typename Variant>
+struct is_type_in_variant;
+
+/**
+ * @brief Implement the inclusion check using `std::disjunction`.
+ * @tparam T the type to check for inclusion
+ * @tparam Variant the std::variant that should include the type @p T
+ */
+template <typename T, typename... Types>
+struct is_type_in_variant<T, std::variant<Types...>> : std::disjunction<std::is_same<T, Types>...> { };
+
+/**
+ * @copydoc plssvm::kokkos::detail::impl::is_type_in_variant
+ */
+template <typename T, typename Variant>
+inline constexpr bool is_type_in_variant_v = is_type_in_variant<T, Variant>::value;
+
+}  // namespace impl
+
+/**
+ * @brief Convert a `plssvm::detail::dim_type` to a Kokkos native one-dimensional value.
+ * @param[in] dims the dimensional value to convert
+ * @return the native one-dimensional value (`[[nodiscard]]`)
+ */
+[[nodiscard]] int dim_type_to_native(const ::plssvm::detail::dim_type &dims);
+
+/**
+ * @brief Return a `std::map` containing a mapping from all available target platforms to the available Kokkos::ExecutionSpace that supports said target platform.
+ * @details If a target platform is supported by multiple Kokkos::ExecutionSpace, the order is determined by the order as returned by `list_available_execution_spaces`.
+ * @return the mapping of all available target_platform <-> Kokkos::ExecutionSpace combinations (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::map<target_platform, std::vector<execution_space>> available_target_platform_to_execution_space_mapping();
+
+/**
+ * @brief Get the name of the device represented by the `device_wrapper` @p dev.
+ * @param[in] dev the device wrapper
+ * @return the device name (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string get_device_name(const device_wrapper &dev);
+
+/**
+ * @brief Wait for all kernel and/or other operations on the device wrapper in the @p dev to finish.
+ * @param[in] dev the device wrapper
+ */
+void device_synchronize(const device_wrapper &dev);
+
+/**
+ * @brief Wait for all kernel and/or other operations on the device represented by the Kokkos::ExecutionSpace @p exec to finish.
+ * @tparam ExecutionSpace the type of the Kokkos::ExecutionSpace
+ * @param[in] exec the device represented by a Kokkos::ExecutionSpace
+ */
+template <typename ExecutionSpace, PLSSVM_REQUIRES(impl::is_type_in_variant_v<ExecutionSpace, typename impl::create_device_variant_type::type>)>
+void device_synchronize(const ExecutionSpace &exec) {
+    exec.fence();
+}
+
+/**
+ * @brief Get the used Kokkos library version.
+ * @return the library version (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string get_kokkos_version();
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_
diff --git a/include/plssvm/backends/Kokkos/exceptions.hpp b/include/plssvm/backends/Kokkos/exceptions.hpp
new file mode 100644
index 000000000..60a9fc8dd
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/exceptions.hpp
@@ -0,0 +1,38 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implements custom exception classes specific to the Kokkos backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_EXCEPTIONS_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_EXCEPTIONS_HPP_
+#pragma once
+
+#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
+
+#include <string>  // std::string
+
+namespace plssvm::kokkos {
+
+/**
+ * @brief Exception type thrown if a problem with the Kokkos backend occurs.
+ */
+class backend_exception : public exception {
+  public:
+    /**
+     * @brief Construct a new exception forwarding the exception message and source location to plssvm::exception.
+     * @param[in] msg the exception's `what()` message
+     * @param[in] loc the exception's call side information
+     */
+    explicit backend_exception(const std::string &msg, source_location loc = source_location::current());
+};
+
+}  // namespace plssvm::kokkos
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_EXCEPTIONS_HPP_
diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp
new file mode 100644
index 000000000..cc9114412
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/execution_space.hpp
@@ -0,0 +1,82 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Execution space enumeration for the ExecutionSpaces in Kokkos.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_
+#pragma once
+
+#include "fmt/base.h"     // fmt::formatter
+#include "fmt/ostream.h"  // fmt::ostream_formatter
+
+#include <iosfwd>  // std::ostream forward declaration
+#include <vector>  // std::vector
+
+namespace plssvm::kokkos {
+
+/**
+ * @brief Enum class for all execution spaces supported by [Kokkos](https://github.com/kokkos/kokkos).
+ */
+enum class execution_space {
+    /** Automatically determine the used Kokkos execution space. Note: this does not necessarily correspond to Kokkos::DefaultExecutionSpace! */
+    automatic,
+    /** Execution space representing execution on a CUDA device. */
+    cuda,
+    /** Execution space representing execution on a device supported by HIP. */
+    hip,
+    /** Execution space representing execution on a device supported by SYCL. */
+    sycl,
+    /** Execution space representing execution with the HPX runtime system. */
+    hpx,
+    /** Execution space representing execution with the OpenMP runtime system. */
+    openmp,
+    /** Execution space representing execution using the target offloading feature of the OpenMP runtime system. */
+    openmp_target,
+    /** Execution space representing execution with the OpenACC runtime system. */
+    openacc,
+    /** Execution space representing parallel execution with std::threads. */
+    threads,
+    /** Execution space representing serial execution on the CPU. Should always be available. */
+    serial
+};
+
+/**
+ * @brief Output the execution @p space to the given output-stream @p out.
+ * @param[in,out] out the output-stream to write the execution space to
+ * @param[in] space the Kokkos execution space
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, execution_space space);
+
+/**
+ * @brief Use the input-stream @p in to initialize the execution @p space.
+ * @param[in,out] in input-stream to extract the execution space from
+ * @param[in] space the Kokkos execution space
+ * @return the input-stream
+ */
+std::istream &operator>>(std::istream &in, execution_space &space);
+
+/**
+ * @brief List all available Kokkos::ExecutionSpaces.
+ * @details Only Kokkos::ExecutionSpaces that where enabled during the CMake configuration are available.
+ * @return the available Kokkos::ExecutionSpaces (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::vector<execution_space> list_available_execution_spaces();
+
+}  // namespace plssvm::kokkos
+
+/// @cond
+
+template <>
+struct fmt::formatter<plssvm::kokkos::execution_space> : fmt::ostream_formatter { };
+
+/// @endcond
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_
diff --git a/include/plssvm/backends/Kokkos/execution_space_type_traits.hpp b/include/plssvm/backends/Kokkos/execution_space_type_traits.hpp
new file mode 100644
index 000000000..aa5e31751
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/execution_space_type_traits.hpp
@@ -0,0 +1,238 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Execution space type traits for the ExecutionSpaces in Kokkos.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_TYPE_TRAITS_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_TYPE_TRAITS_HPP_
+#pragma once
+
+#include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::execution_space
+
+#include "Kokkos_Core.hpp"  // Kokkos macros, Kokkos ExecutionSpace types
+
+namespace plssvm::kokkos {
+
+//***************************************************//
+//           execution_space_to_kokkos_type          //
+//***************************************************//
+
+/**
+ * @brief Uninstantiated base type to convert an `execution_space` enum value to a Kokkos::ExecutionSpace type.
+ */
+template <execution_space>
+struct execution_space_to_kokkos_type;
+
+#if defined(KOKKOS_ENABLE_CUDA)
+/**
+ * @brief Convert an `execution_space::cuda` enum value to a `Kokkos::Cuda` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::cuda> {
+    using type = Kokkos::Cuda;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_HIP)
+/**
+ * @brief Convert an `execution_space::hip` enum value to a `Kokkos::HIP` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::hip> {
+    using type = Kokkos::HIP;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_SYCL)
+/**
+ * @brief Convert an `execution_space::sycl` enum value to a `Kokkos::SYCL` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::sycl> {
+    using type = Kokkos::SYCL;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_HPX)
+/**
+ * @brief Convert an `execution_space::hpx` enum value to a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::hpx> {
+    using type = Kokkos::Experimental::HPX;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMP)
+/**
+ * @brief Convert an `execution_space::openmp` enum value to a `Kokkos::OpenMP` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::openmp> {
+    using type = Kokkos::OpenMP;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)
+/**
+ * @brief Convert an `execution_space::openmp_target` enum value to a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::openmp_target> {
+    using type = Kokkos::Experimental::OpenMPTarget;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENACC)
+/**
+ * @brief Convert an `execution_space::openacc` enum value to a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::openacc> {
+    using type = Kokkos::Experimental::OpenACC;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_THREADS)
+/**
+ * @brief Convert an `execution_space::threads` enum value to a `Kokkos::Threads` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::threads> {
+    using type = Kokkos::Threads;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_SERIAL)
+/**
+ * @brief Convert an `execution_space::serial` enum value to a `Kokkos::Serial` Kokkos::ExecutionSpace type.
+ */
+template <>
+struct execution_space_to_kokkos_type<execution_space::serial> {
+    using type = Kokkos::Serial;
+};
+#endif
+
+/**
+ * @brief Convert the `execution_space` @p space to the corresponding Kokkos::ExecutionSpace type.
+ * @tparam space the enum value to convert
+ */
+template <execution_space space>
+using execution_space_to_kokkos_type_t = typename execution_space_to_kokkos_type<space>::type;
+
+//***************************************************//
+//           kokkos_type_to_execution_space          //
+//***************************************************//
+
+/**
+ * @brief Uninstantiated base type to convert a Kokkos::ExecutionSpace type to a `execution_space` enum value.
+ */
+template <typename>
+struct kokkos_type_to_execution_space;
+
+#if defined(KOKKOS_ENABLE_CUDA)
+/**
+ * @brief Convert a `Kokkos::Cuda` Kokkos::ExecutionSpace type to an `execution_space::cuda` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::Cuda> {
+    constexpr static execution_space value = execution_space::cuda;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_HIP)
+/**
+ * @brief Convert a `Kokkos::HIP` Kokkos::ExecutionSpace type to an `execution_space::hip` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::HIP> {
+    constexpr static execution_space value = execution_space::hip;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_SYCL)
+/**
+ * @brief Convert a `Kokkos::SYCL` Kokkos::ExecutionSpace type to an `execution_space::sycl` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::SYCL> {
+    constexpr static execution_space value = execution_space::sycl;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_HPX)
+/**
+ * @brief Convert a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type to an `execution_space::hpx` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::Experimental::HPX> {
+    constexpr static execution_space value = execution_space::hpx;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMP)
+/**
+ * @brief Convert a `Kokkos::OpenMP` Kokkos::ExecutionSpace type to an `execution_space::openmp` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::OpenMP> {
+    constexpr static execution_space value = execution_space::openmp;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)
+/**
+ * @brief Convert a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type to an `execution_space::openmp_target` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::Experimental::OpenMPTarget> {
+    constexpr static execution_space value = execution_space::openmp_target;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENACC)
+/**
+ * @brief Convert a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type to an `execution_space::openacc` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::Experimental::OpenACC> {
+    constexpr static execution_space value = execution_space::openacc;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_THREADS)
+/**
+ * @brief Convert a `Kokkos::Threads` Kokkos::ExecutionSpace type to an `execution_space::threads` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::Threads> {
+    constexpr static execution_space value = execution_space::threads;
+};
+#endif
+
+#if defined(KOKKOS_ENABLE_SERIAL)
+/**
+ * @brief Convert a `Kokkos::Serial` Kokkos::ExecutionSpace type to an `execution_space::serial` enum value.
+ */
+template <>
+struct kokkos_type_to_execution_space<Kokkos::Serial> {
+    constexpr static execution_space value = execution_space::serial;
+};
+#endif
+
+/**
+ * @brief Convert the Kokkos::ExecutionSpace type @p ExecutionSpace to the corresponding `execution_space` enum value.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace type to convert
+ */
+template <typename ExecutionSpace>
+inline constexpr execution_space kokkos_type_to_execution_space_v = kokkos_type_to_execution_space<ExecutionSpace>::value;
+
+}  // namespace plssvm::kokkos
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_TYPE_TRAITS_HPP_
diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp
new file mode 100644
index 000000000..bddadac01
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp
@@ -0,0 +1,450 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the Kokkos backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_BLAS_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_BLAS_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp"  // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+
+#include "Kokkos_Core.hpp"  // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents
+
+#include <cstddef>  // std::size_t
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ */
+template <typename ExecutionSpace>
+class device_kernel_symm {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[in] num_rows the number of rows in @p A and @p C
+     * @param[in] num_rhs the number of columns in @p B and @p C
+     * @param[in] device_specific_num_rows the number of rows in @p A and number of rows in @p B; thr rows in @p A are potentially distributed across multiple devices
+     * @param[in] row_offset the first row this device is responsible for
+     * @param[in] alpha the scalar alpha value
+     * @param[in] A the matrix @p A
+     * @param[in] B the matrix @p B
+     * @param[in] beta the scalar beta value
+     * @param[in,out] C the matrix @p C, also used as result matrix
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     */
+    device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, device_view_type<const real_type> A, device_view_type<const real_type> B, const real_type beta, device_view_type<real_type> C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
+        num_rows_{ num_rows },
+        num_rhs_{ num_rhs },
+        device_specific_num_rows_{ device_specific_num_rows },
+        row_offset_{ row_offset },
+        alpha_{ alpha },
+        A_{ A },
+        B_{ B },
+        beta_{ beta },
+        C_{ C },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto FEATURE_BLOCK_SIZE_sz = static_cast<std::size_t>(FEATURE_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // calculate the indices used in the current thread
+        const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;  // # rhs -> num_rhs
+        const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+        const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;  // # rows -> num_mirror_rows
+        const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+
+        // create the shared memory arrays used for caching data point features
+        constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE;
+        real_type *data_cache_ptr = static_cast<real_type *>(team.team_shmem().get_shmem(2 * shmem_size * sizeof(real_type)));
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> A_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> B_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+        // create a thread private array used for internal caching
+        real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
+
+        // iterate over all features using blocking to be able to cache them for faster memory accesses
+        for (std::size_t dim = 0; dim < (num_rows_ - row_offset_); dim += FEATURE_BLOCK_SIZE_sz) {
+            // load data into shared memory
+            for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                const auto global_i = i_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+                const auto global_j = j_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                // determine on which side of the diagonal we are located
+                if (dim + threadIdx_y < global_j) {
+                    A_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + global_j - (dim + threadIdx_y) * (dim + threadIdx_y + std::size_t{ 1 }) / std::size_t{ 2 }];
+                } else {
+                    A_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + dim + threadIdx_y - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                }
+                // determine on which side of the diagonal we are located
+                if (dim + threadIdx_y + THREAD_BLOCK_SIZE < global_j) {
+                    A_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + global_j - (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz + std::size_t{ 1 }) / std::size_t{ 2 }];
+                } else {
+                    A_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                }
+
+                B_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(dim + row_offset_ + threadIdx_y) * (num_rhs_ + PADDING_SIZE_sz) + global_i];
+                B_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(dim + row_offset_ + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rhs_ + PADDING_SIZE_sz) + global_i];
+            }
+            team.team_barrier();  // wait until all threads loaded their part of the data
+
+            // perform the dot product calculation
+            for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) {
+                for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                    for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                        temp[internal_i][internal_j] += A_cache(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j) * B_cache(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i);
+                    }
+                }
+            }
+            team.team_barrier();  // wait until all threads performed their part of the calculations
+        }
+
+        // apply the (partial) BLAS operation and update C
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const auto global_i = i + static_cast<std::size_t>(internal_i);
+                const auto device_global_j = j + static_cast<std::size_t>(internal_j);
+                const auto global_j = row_offset_ + j + static_cast<std::size_t>(internal_j);
+
+                // be sure to not perform out of bounds accesses
+                if (global_i < num_rhs_ && device_global_j < device_specific_num_rows_) {
+                    C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i] = alpha_ * temp[internal_i][internal_j] + beta_ * C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i];
+                }
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    const std::size_t num_rows_;
+    const std::size_t num_rhs_;
+    const std::size_t device_specific_num_rows_;
+    const std::size_t row_offset_;
+    const real_type alpha_;
+    device_view_type<const real_type> A_;
+    device_view_type<const real_type> B_;
+    const real_type beta_;
+    device_view_type<real_type> C_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    /// @endcond
+};
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars.
+ * @details In a multi-GPU setting, this function is responsible for mirroring down the columns this device is responsible for!
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ */
+template <typename ExecutionSpace>
+class device_kernel_symm_mirror {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[in] num_rows the number of rows in @p A and @p C
+     * @param[in] num_rhs the number of columns in @p B and @p C
+     * @param[in] num_mirror_rows the number of rows to mirror down
+     * @param[in] device_specific_num_rows the number of rows in @p A and number of rows in @p B; thr rows in @p A are potentially distributed across multiple devices
+     * @param[in] row_offset the first row this device is responsible for
+     * @param[in] alpha the scalar alpha value
+     * @param[in] A the matrix @p A
+     * @param[in] B the matrix @p B
+     * @param[in] beta the scalar beta value
+     * @param[in,out] C the matrix @p C, also used as result matrix
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     */
+    device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, device_view_type<const real_type> A, device_view_type<const real_type> B, const real_type beta, device_view_type<real_type> C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
+        num_rows_{ num_rows },
+        num_rhs_{ num_rhs },
+        num_mirror_rows_{ num_mirror_rows },
+        device_specific_num_rows_{ device_specific_num_rows },
+        row_offset_{ row_offset },
+        alpha_{ alpha },
+        A_{ A },
+        B_{ B },
+        beta_{ beta },
+        C_{ C },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto FEATURE_BLOCK_SIZE_sz = static_cast<std::size_t>(FEATURE_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // calculate the indices used in the current thread
+        const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;  // # rhs -> num_rhs
+        const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+        const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;  // # rows -> num_mirror_rows
+        const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+
+        // create the shared memory arrays used for caching data point features
+        constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE;
+        real_type *data_cache_ptr = static_cast<real_type *>(team.team_shmem().get_shmem(2 * shmem_size * sizeof(real_type)));
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> A_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> B_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+        // create a thread private array used for internal caching
+        real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
+
+        // iterate over the remaining features using blocking to be able to cache them for faster memory accesses
+        for (std::size_t dim = 0; dim < device_specific_num_rows_; dim += FEATURE_BLOCK_SIZE_sz) {
+            // load data into shared memory
+            for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                const auto global_i = i_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+                const auto global_j = j_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                A_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - (dim + threadIdx_y - std::size_t{ 1 }) * (dim + threadIdx_y) / std::size_t{ 2 } + device_specific_num_rows_ - (dim + threadIdx_y) + global_j];
+                A_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz - std::size_t{ 1 }) * (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) / std::size_t{ 2 } + device_specific_num_rows_ - (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) + global_j];
+                B_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(row_offset_ + dim + threadIdx_y) * (num_rhs_ + PADDING_SIZE_sz) + global_i];
+                B_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(row_offset_ + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rhs_ + PADDING_SIZE_sz) + global_i];
+            }
+            team.team_barrier();  // wait until all threads loaded their part of the data
+
+            // perform the feature reduction calculation
+            for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) {
+                for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                    for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                        temp[internal_i][internal_j] += A_cache(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j) * B_cache(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i);
+                    }
+                }
+            }
+            team.team_barrier();  // wait until all threads performed their part of the calculations
+        }
+
+        // apply the (remaining) BLAS operation and update C
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const auto global_i = i + static_cast<std::size_t>(internal_i);
+                const auto partial_global_j = j + static_cast<std::size_t>(internal_j);
+                const auto global_j = row_offset_ + device_specific_num_rows_ + j + static_cast<std::size_t>(internal_j);
+
+                // be sure to not perform out of bounds accesses
+                if (global_i < num_rhs_ && partial_global_j < num_mirror_rows_) {
+                    C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i] = alpha_ * temp[internal_i][internal_j] + beta_ * C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i];
+                }
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    const std::size_t num_rows_;
+    const std::size_t num_rhs_;
+    const std::size_t num_mirror_rows_;
+    const std::size_t device_specific_num_rows_;
+    const std::size_t row_offset_;
+    const real_type alpha_;
+    device_view_type<const real_type> A_;
+    device_view_type<const real_type> B_;
+    const real_type beta_;
+    device_view_type<real_type> C_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    /// @endcond
+};
+
+/**
+ * @brief Perform a simple inplace matrix addition: lhs += rhs.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ */
+template <typename ExecutionSpace>
+class device_kernel_inplace_matrix_add {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[in] num_cols the number of columns in both matrices
+     * @param[in,out] lhs the first matrix (updated inplace)
+     * @param[in] rhs the second matrix
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     */
+    device_kernel_inplace_matrix_add(const std::size_t num_cols, device_view_type<real_type> lhs, device_view_type<const real_type> rhs, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
+        num_cols_{ num_cols },
+        lhs_{ lhs },
+        rhs_{ rhs },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // Calculate the indices used in the current thread
+        const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;  // num_rows
+        const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;  // num_rhs
+
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const auto global_i = i + static_cast<std::size_t>(internal_i);
+                const auto global_j = j + static_cast<std::size_t>(internal_j);
+
+                lhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j] += rhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j];
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    const std::size_t num_cols_;
+    device_view_type<real_type> lhs_;
+    device_view_type<const real_type> rhs_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    /// @endcond
+};
+
+/**
+ * @brief Perform a simple inplace matrix scale: lhs *= scalar.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ */
+template <typename ExecutionSpace>
+class device_kernel_inplace_matrix_scale {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[in] num_cols the number of columns in the matrix
+     * @param[in,out] lhs the first matrix (updated inplace)
+     * @param[in] scale the value to scale
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     */
+    device_kernel_inplace_matrix_scale(const std::size_t num_cols, device_view_type<real_type> lhs, const real_type scale, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
+        num_cols_{ num_cols },
+        lhs_{ lhs },
+        scale_{ scale },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // Calculate the indices used in the current thread
+        const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;  // num_rows
+        const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;  // num_rhs
+
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const auto global_i = i + static_cast<std::size_t>(internal_i);
+                const auto global_j = j + static_cast<std::size_t>(internal_j);
+
+                lhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j] *= scale_;
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    const std::size_t num_cols_;
+    device_view_type<real_type> lhs_;
+    const real_type scale_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    /// @endcond
+};
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_BLAS_HPP_
diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp
new file mode 100644
index 000000000..8e42e8b41
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -0,0 +1,181 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly assembling the kernel matrix using the Kokkos backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#pragma once
+
+#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp"  // plssvm::kokkos::detail::standard_layout_tuple
+#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp"       // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                                     // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/kernel_function_types.hpp"                         // plssvm::kernel_function_type
+
+#include "Kokkos_Core.hpp"  // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents
+
+#include <cstddef>  // std::size_t
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief Create the explicit kernel matrix using the @p kernel_function.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ * @tparam kernel_function the type of the used kernel function
+ * @tparam Args the types of the parameters necessary for the specific kernel function; stored in a `standard_layout_tuple`
+ */
+template <typename ExecutionSpace, kernel_function_type kernel_function, typename... Args>
+class device_kernel_assembly {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[out] kernel_matrix_d the calculated kernel matrix
+     * @param[in] data_d the data points to calculate the kernel matrix from
+     * @param[in] num_rows the number of data points
+     * @param[in] device_num_rows the number of rows the current device is responsible for
+     * @param[in] row_offset the first row in @p data_d the current device is responsible for
+     * @param[in] num_features the number of features per data point
+     * @param[in] q the vector used in the dimensional reduction
+     * @param[in] QA_cost the scalar used in the dimensional reduction
+     * @param[in] cost the cost factor the diagonal is scaled with
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
+     */
+    device_kernel_assembly(device_view_type<real_type> kernel_matrix_d, device_view_type<real_type> data_d, const std::size_t num_rows, const std::size_t device_num_rows, const std::size_t row_offset, const std::size_t num_features, device_view_type<real_type> q, const real_type QA_cost, const real_type cost, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) :
+        kernel_matrix_d_{ kernel_matrix_d },
+        data_d_{ data_d },
+        num_rows_{ num_rows },
+        device_num_rows_{ device_num_rows },
+        row_offset_{ row_offset },
+        num_features_{ num_features },
+        q_{ q },
+        QA_cost_{ QA_cost },
+        cost_{ cost },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x },
+        kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward<Args>(kernel_function_parameter)...) } {
+    }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto FEATURE_BLOCK_SIZE_sz = static_cast<std::size_t>(FEATURE_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // calculate the indices used in the current thread
+        const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;  // # rhs -> num_rhs
+        const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+        const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;  // # rows -> num_mirror_rows
+        const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+
+        // create the shared memory arrays used for caching data point features
+        constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE;
+        real_type *data_cache_ptr = static_cast<real_type *>(team.team_shmem().get_shmem(2 * shmem_size));
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_i{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_j{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+        // only calculate the upper triangular matrix -> can't use threadIdx since all threads in a warp must progress further
+        if (blockIdx_x >= blockIdx_y) {
+            // create a thread private array used for internal caching
+            real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
+
+            // iterate over all features using blocking to be able to cache them for faster memory accesses
+            for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) {
+                // load data into shared memory
+                for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                    const auto global_i = row_offset_ + i_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+                    const auto global_j = row_offset_ + j_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                    // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                    data_cache_i(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i];
+                    data_cache_i(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i];
+                    data_cache_j(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j];
+                    data_cache_j(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j];
+                }
+                team.team_barrier();  // wait until all threads loaded their part of the data
+
+                // perform the feature reduction calculation
+                for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) {
+                    for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                        for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                            temp[internal_i][internal_j] += detail::feature_reduce<kernel_function>(data_cache_i(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i),
+                                                                                                    data_cache_j(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j));
+                        }
+                    }
+                }
+                team.team_barrier();  // wait until all threads performed their part of the calculations
+            }
+
+            // apply the remaining part of the kernel function and store the value in the output kernel matrix
+            for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                    // calculate the indices to access the kernel matrix (the part stored on the current device)
+                    const auto device_global_i = i + static_cast<std::size_t>(internal_i);
+                    const auto global_i = row_offset_ + i + static_cast<std::size_t>(internal_i);
+                    const auto device_global_j = j + static_cast<std::size_t>(internal_j);
+                    const auto global_j = row_offset_ + j + static_cast<std::size_t>(internal_j);
+
+                    // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                    if (device_global_i < (num_rows_ - row_offset_) && device_global_j < device_num_rows_ && global_i >= global_j) {
+                        real_type temp_ij = temp[internal_i][internal_j];
+                        temp_ij = detail::apply_kernel_function<kernel_function>(temp_ij, kernel_function_parameter_) + QA_cost_ - q_[global_i] - q_[global_j];
+                        // apply the cost on the diagonal
+                        if (global_i == global_j) {
+                            temp_ij += cost_;
+                        }
+                        // update the kernel matrix
+                        kernel_matrix_d_[device_global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - device_global_j * (device_global_j + std::size_t{ 1 }) / std::size_t{ 2 } + device_global_i] = temp_ij;
+                    }
+                }
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    device_view_type<real_type> kernel_matrix_d_;
+    device_view_type<const real_type> data_d_;
+    const std::size_t num_rows_;
+    const std::size_t device_num_rows_;
+    const std::size_t row_offset_;
+    const std::size_t num_features_;
+    device_view_type<const real_type> q_;
+    const real_type QA_cost_;
+    const real_type cost_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    const detail::standard_layout_tuple<Args...> kernel_function_parameter_;
+    /// @endcond
+};
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
diff --git a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
new file mode 100644
index 000000000..b22f69885
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -0,0 +1,286 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for implicitly assembling the kernel matrix using the Kokkos backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#pragma once
+
+#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp"  // plssvm::kokkos::detail::standard_layout_tuple
+#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp"       // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                                     // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/kernel_function_types.hpp"                         // plssvm::kernel_function_type
+
+#include "Kokkos_Core.hpp"  // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add
+
+#include <cstddef>  // std::size_t
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief Perform an implicit BLAS SYMM-like operation: `C = alpha * A * B + C` where `A` is the implicitly calculated kernel matrix using the @p kernel_function (never actually stored, reducing the amount of needed global memory), @p B and @p C are matrices, and @p alpha is a scalar.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ * @tparam kernel_function the type of the used kernel function
+ * @tparam Args the types of the parameters necessary for the specific kernel function
+ */
+template <typename ExecutionSpace, kernel_function_type kernel_function, typename... Args>
+class device_kernel_assembly_symm {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[in] alpha the scalar alpha value
+     * @param[in] q the vector used in the dimensional reduction
+     * @param[in] data_d the data points to calculate the implicit kernel matrix from
+     * @param[in] num_rows the total number of data points (= total number of rows)
+     * @param[in] device_num_rows the number of rows the current device is responsible for
+     * @param[in] row_offset the first row in @p data_d the current device is responsible for
+     * @param[in] num_features the number of features per data point
+     * @param[in] QA_cost the scalar used in the dimensional reduction
+     * @param[in] cost the cost factor the diagonal is scaled with
+     * @param[in] B the matrix @p B
+     * @param[in,out] C the matrix @p C
+     * @param[in] num_classes the number of classes in the data set
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
+     */
+    device_kernel_assembly_symm(const real_type alpha, device_view_type<const real_type> q, device_view_type<const real_type> data_d, const std::size_t num_rows, const std::size_t device_num_rows, const std::size_t row_offset, const std::size_t num_features, const real_type QA_cost, const real_type cost, device_view_type<const real_type> B, device_view_type<real_type> C, const std::size_t num_classes, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) :
+        alpha_{ alpha },
+        q_{ q },
+        data_d_{ data_d },
+        num_rows_{ num_rows },
+        device_num_rows_{ device_num_rows },
+        row_offset_{ row_offset },
+        num_features_{ num_features },
+        QA_cost_{ QA_cost },
+        cost_{ cost },
+        B_{ B },
+        C_{ C },
+        num_classes_{ num_classes },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x },
+        kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward<Args>(kernel_function_parameter)...) } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto FEATURE_BLOCK_SIZE_sz = static_cast<std::size_t>(FEATURE_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // calculate the indices used in the current thread
+        const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;  // # rhs -> num_rhs
+        const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+        const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;  // # rows -> num_mirror_rows
+        const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+
+        // only calculate the upper triangular matrix -> can't use threadIdx since all threads in a warp must progress further
+        if (blockIdx_x >= blockIdx_y) {
+            // create a thread private array used for internal caching
+            real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
+
+            // create the shared memory arrays used for caching data point features
+            constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE;
+            real_type *data_cache_ptr = static_cast<real_type *>(team.team_shmem().get_shmem(2 * shmem_size));
+
+            {
+                // create the shared memory arrays used for caching data point features
+                Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_i{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+                Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_j{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+                // iterate over all features using blocking to be able to cache them for faster memory accesses
+                for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) {
+                    // load data into shared memory
+                    for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                        const auto global_i = row_offset_ + i_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+                        const auto global_j = row_offset_ + j_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                        // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                        data_cache_i(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i];
+                        data_cache_i(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i];
+                        data_cache_j(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j];
+                        data_cache_j(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j];
+                    }
+                    team.team_barrier();  // wait until all threads loaded their part of the data
+
+                    // perform the feature reduction calculation
+                    for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) {
+                        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                                temp[internal_i][internal_j] += detail::feature_reduce<kernel_function>(data_cache_i(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i),
+                                                                                                        data_cache_j(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j));
+                            }
+                        }
+                    }
+                    team.team_barrier();  // wait until all threads performed their part of the calculations
+                }
+            }
+
+            // apply the remaining part of the kernel function and store the value in the output kernel matrix
+            for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                    const auto global_i = row_offset_ + i + static_cast<std::size_t>(internal_i);
+                    const auto device_global_i = i + static_cast<std::size_t>(internal_i);
+                    const auto global_j = row_offset_ + j + static_cast<std::size_t>(internal_j);
+                    const auto device_global_j = j + static_cast<std::size_t>(internal_j);
+
+                    // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                    if ((device_global_i < (num_rows_ - row_offset_) && device_global_j < device_num_rows_ && global_i >= global_j)) {
+                        temp[internal_i][internal_j] = detail::apply_kernel_function<kernel_function>(temp[internal_i][internal_j], kernel_function_parameter_) + QA_cost_ - q_[global_i] - q_[global_j];
+                        // apply the cost on the diagonal
+                        if (global_i == global_j) {
+                            temp[internal_i][internal_j] += cost_;
+                        }
+                    } else {
+                        // be sure to set the value to zero otherwise
+                        temp[internal_i][internal_j] = real_type{ 0.0 };
+                    }
+                }
+            }
+
+            // calculate C += alpha * temp * B for the UPPER triangular matrix
+            {
+                // same shared memory size but with different dimensions
+                Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> B_cache{ data_cache_ptr, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE, FEATURE_BLOCK_SIZE };
+                Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> C_out_cache{ data_cache_ptr + shmem_size, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE, FEATURE_BLOCK_SIZE };
+
+                // iterate over all classes using blocking to be able to cache them for faster memory accesses
+                for (std::size_t dim = 0; dim < num_classes_; dim += FEATURE_BLOCK_SIZE_sz) {
+                    // load data into shared memory
+                    for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                        const auto global_i = row_offset_ + i_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                        // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                        B_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y) = alpha_ * B_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y];
+                        B_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y + THREAD_BLOCK_SIZE) = alpha_ * B_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz];
+                        C_out_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y) = real_type{ 0.0 };
+                        C_out_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y + THREAD_BLOCK_SIZE) = real_type{ 0.0 };
+                    }
+                    team.team_barrier();  // wait until all threads loaded their part of the data
+
+                    // calculate intermediate results and store them in shared memory
+                    for (unsigned class_idx = 0; class_idx < FEATURE_BLOCK_SIZE; ++class_idx) {
+                        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                                C_out_cache(threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j, (class_idx + threadIdx_x) % FEATURE_BLOCK_SIZE) +=
+                                    temp[internal_i][internal_j] * B_cache(threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i, (class_idx + threadIdx_x) % FEATURE_BLOCK_SIZE);
+                            }
+                        }
+                        team.team_barrier();  // wait until all threads performed their part of the calculations
+                    }
+
+                    // add intermediate cached results to C
+                    for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                        const auto global_j = row_offset_ + j + static_cast<std::size_t>(internal);
+                        Kokkos::atomic_add(&C_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_x], C_out_cache(threadIdx_y * INTERNAL_BLOCK_SIZE + internal, threadIdx_x));
+                        Kokkos::atomic_add(&C_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_x + THREAD_BLOCK_SIZE_sz], C_out_cache(threadIdx_y * INTERNAL_BLOCK_SIZE + internal, threadIdx_x + THREAD_BLOCK_SIZE));
+                    }
+                    team.team_barrier();  // wai until all threads updated C with their values
+                }
+            }
+
+            // set potential diagonal entries in temp to 0.0 such that we don't apply the main diagonal twice to C
+            for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                    const auto global_i = row_offset_ + i + static_cast<std::size_t>(internal_i);
+                    const auto global_j = row_offset_ + j + static_cast<std::size_t>(internal_j);
+
+                    if (global_i == global_j) {
+                        temp[internal_i][internal_j] = real_type{ 0.0 };
+                    }
+                }
+            }
+
+            // calculate C += alpha * temp * B for the LOWER triangular matrix
+            {
+                // same shared memory size but with different dimensions
+                Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> B_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+                Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> C_out_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+                // iterate over all classes using blocking to be able to cache them for faster memory accesses
+                for (std::size_t dim = 0; dim < num_classes_; dim += FEATURE_BLOCK_SIZE_sz) {
+                    // load data into shared memory
+                    for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                        const auto global_j = row_offset_ + j_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                        // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                        B_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_ * B_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y];
+                        B_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_ * B_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz];
+                        C_out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 };
+                        C_out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 };
+                    }
+                    team.team_barrier();  // wait until all threads loaded their part of the data
+
+                    // calculate intermediate results and store them in shared memory
+                    for (unsigned class_idx = 0; class_idx < FEATURE_BLOCK_SIZE; ++class_idx) {
+                        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                                C_out_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, internal_i * THREAD_BLOCK_SIZE + threadIdx_x) +=
+                                    temp[internal_i][internal_j] * B_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j);
+                            }
+                        }
+                        team.team_barrier();  // wait until all threads performed their part of the calculations
+                    }
+
+                    // add intermediate cached results to C
+                    for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                        const auto global_i = row_offset_ + i + static_cast<std::size_t>(internal);
+                        Kokkos::atomic_add(&C_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y], C_out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x));
+                        Kokkos::atomic_add(&C_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz], C_out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x));
+                    }
+                    team.team_barrier();  // wait until all threads updated C with their values
+                }
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    const real_type alpha_;
+    device_view_type<const real_type> q_;
+    device_view_type<const real_type> data_d_;
+    const std::size_t num_rows_;
+    const std::size_t device_num_rows_;
+    const std::size_t row_offset_;
+    const std::size_t num_features_;
+    const real_type QA_cost_;
+    const real_type cost_;
+    device_view_type<const real_type> B_;
+    device_view_type<real_type> C_;
+    const std::size_t num_classes_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    const detail::standard_layout_tuple<Args...> kernel_function_parameter_;
+    /// @endcond
+};
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
diff --git a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp
new file mode 100644
index 000000000..35cbe8ed1
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp
@@ -0,0 +1,127 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implement the different kernel functions on the GPU using Kokkos.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_
+
+#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp"  // plssvm::kokkos::detail::standard_layout_tuple
+#include "plssvm/constants.hpp"                                     // plssvm::real_type
+#include "plssvm/detail/utility.hpp"                                // plssvm::detail::always_false_v
+#include "plssvm/kernel_function_types.hpp"                         // plssvm::kernel_function_type
+
+#include "Kokkos_MathematicalFunctions.hpp"  // KOKKOS_INLINE_FUNCTION, Kokkos::pow, Kokkos::exp, Kokkos::tanh, Kokkos::abs
+
+#include <float.h>      // LT_MIN, DBL_MIN
+#include <type_traits>  // std::is_same_v
+
+namespace plssvm::kokkos::detail {
+
+//***************************************************//
+//                 feature reductions                //
+//***************************************************//
+
+/**
+ * @brief Compute the default feature reduction, i.e., a simple dot-product.
+ * @tparam kernel_function the kernel function type
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <kernel_function_type kernel_function>
+[[nodiscard]] KOKKOS_INLINE_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+    return val1 * val2;
+}
+
+/**
+ * @brief Compute the feature reduction for the radial basis function kernel function, i.e., the squared euclidean distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] KOKKOS_INLINE_FUNCTION real_type feature_reduce<kernel_function_type::rbf>(const real_type val1, const real_type val2) {
+    const real_type d = val1 - val2;
+    return d * d;
+}
+
+/**
+ * @brief Compute the feature reduction for the laplacian kernel function, i.e., the Manhattan distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] KOKKOS_INLINE_FUNCTION real_type feature_reduce<kernel_function_type::laplacian>(const real_type val1, const real_type val2) {
+    return Kokkos::fabs(val1 - val2);
+}
+
+/**
+ * @brief Return the minimum possible floating point value for type @p T.
+ * @brief Function necessary such the the `if constexpr` depends on a template parameter and, therefore, no false-positive implicit conversion warnings are reported.
+ * @tparam T the type to retrieve the minimum value
+ * @return the minimum floating point value for type @p T (`[[nodiscard]]`)
+ */
+template <typename T>
+[[nodiscard]] constexpr KOKKOS_INLINE_FUNCTION T real_type_min() {
+    if constexpr (std::is_same_v<real_type, float>) {
+        return FLT_MIN;
+    } else {
+        return DBL_MIN;
+    }
+}
+
+/**
+ * @brief Compute the feature reduction for the chi-squared kernel function.
+ * @note Be sure that the denominator isn't 0.0 which may be the case for padding values.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] KOKKOS_INLINE_FUNCTION real_type feature_reduce<kernel_function_type::chi_squared>(const real_type val1, const real_type val2) {
+    const real_type d = val1 - val2;
+    return (real_type{ 1.0 } / (val1 + val2 + real_type_min<real_type>())) * d * d;
+}
+
+//***************************************************//
+//                  kernel functions                 //
+//***************************************************//
+
+/**
+ * @brief Compute the @p kernel_function using @p value and the @p params.
+ * @tparam kernel_function the kernel function type
+ * @tparam Args the types of the potential kernel function parameters
+ * @param[in] value the value to apply the kernel function to
+ * @param[in] params the potential kernel function parameters
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <kernel_function_type kernel_function, typename... Args>
+[[nodiscard]] KOKKOS_INLINE_FUNCTION real_type apply_kernel_function(const real_type value, [[maybe_unused]] const detail::standard_layout_tuple<Args...> params) {
+    if constexpr (kernel_function == kernel_function_type::linear) {
+        return value;
+    } else if constexpr (kernel_function == kernel_function_type::polynomial) {
+        return Kokkos::pow(detail::get<1>(params) * value + detail::get<2>(params), detail::get<0>(params));
+    } else if constexpr (kernel_function == kernel_function_type::rbf) {
+        return Kokkos::exp(-detail::get<0>(params) * value);
+    } else if constexpr (kernel_function == kernel_function_type::sigmoid) {
+        return Kokkos::tanh(detail::get<0>(params) * value + detail::get<1>(params));
+    } else if constexpr (kernel_function == kernel_function_type::laplacian) {
+        return Kokkos::exp(-detail::get<0>(params) * value);
+    } else if constexpr (kernel_function == kernel_function_type::chi_squared) {
+        return Kokkos::exp(-detail::get<0>(params) * value);
+    } else {
+        static_assert(::plssvm::detail::always_false_v<Args...>, "Unsupported kernel function!");
+    }
+}
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_
diff --git a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp
new file mode 100644
index 000000000..767bfc958
--- /dev/null
+++ b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp
@@ -0,0 +1,452 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines the functions used for prediction for the C-SVM using the Kokkos backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_
+#define PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_
+#pragma once
+
+#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp"  // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                                // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
+
+#include "Kokkos_Core.hpp"  // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add
+
+#include <cstddef>  // std::size_t
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief Calculate the `q` vector used to speedup the prediction using the linear kernel function.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ */
+template <typename ExecutionSpace>
+class device_kernel_w_linear {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[in,out] w_d the vector to speedup the linear prediction
+     * @param[in] alpha_d the previously learned weights
+     * @param[in] sv_d the support vectors
+     * @param[in] num_classes the number of classes
+     * @param[in] num_sv the number of support vectors
+     * @param[in] device_specific_num_sv the number of support vectors the current device is responsible for
+     * @param[in] sv_offset the first support vector (row in @p alpha_d) the current device is responsible for
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     */
+    device_kernel_w_linear(device_view_type<real_type> w_d, device_view_type<const real_type> alpha_d, device_view_type<const real_type> sv_d, const std::size_t num_classes, const std::size_t num_sv, const std::size_t device_specific_num_sv, const std::size_t sv_offset, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
+        w_d_{ w_d },
+        alpha_d_{ alpha_d },
+        sv_d_{ sv_d },
+        num_classes_{ num_classes },
+        num_sv_{ num_sv },
+        device_specific_num_sv_{ device_specific_num_sv },
+        sv_offset_{ sv_offset },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // calculate the indices used in the current thread
+        const auto feature_idx = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;
+        const auto feature_idx_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+        const auto class_idx = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;
+        const auto class_idx_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+
+        // create the shared memory arrays used for caching data point features
+        constexpr std::size_t shmem_size = THREAD_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE;
+        real_type *data_cache_ptr = static_cast<real_type *>(team.team_shmem().get_shmem(2 * shmem_size));
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_feature{ data_cache_ptr, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_alpha{ data_cache_ptr + shmem_size, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+        // create a thread private array used for internal caching
+        real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
+
+        // iterate over all support vectors using blocking to be able to cache them for faster memory accesses
+        for (std::size_t sv = 0; sv < device_specific_num_sv_; sv += THREAD_BLOCK_SIZE_sz) {
+            // load data into shared memory
+            for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                const auto global_feature_idx = feature_idx_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+                const auto global_class_idx = class_idx_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                data_cache_feature(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_sz) + sv + threadIdx_y];  // SoA
+                data_cache_alpha(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_sz) + sv + sv_offset_ + threadIdx_y];      // AoS
+            }
+            team.team_barrier();  // wait until all threads loaded their part of the data
+
+            // perform the dot product calculation
+            for (unsigned block_dim = 0; block_dim < THREAD_BLOCK_SIZE; ++block_dim) {
+                for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+                    for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                        temp[internal_feature][internal_class] += data_cache_alpha(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_class) * data_cache_feature(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_feature);
+                    }
+                }
+            }
+            team.team_barrier();  // wait until all threads performed their part of the calculations
+        }
+
+        // update global array with local one
+        for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+            for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                const auto global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
+                const auto global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                w_d_[global_feature_idx * (num_classes_ + PADDING_SIZE_sz) + global_class_idx] = temp[internal_feature][internal_class];
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    device_view_type<real_type> w_d_;
+    device_view_type<const real_type> alpha_d_;
+    device_view_type<const real_type> sv_d_;
+    const std::size_t num_classes_;
+    const std::size_t num_sv_;
+    const std::size_t device_specific_num_sv_;
+    const std::size_t sv_offset_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    /// @endcond
+};
+
+/**
+ * @brief Predict the @p predict_points_d using the linear kernel speeding up the calculation using the @p w_d vector.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ */
+template <typename ExecutionSpace>
+class device_kernel_predict_linear {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the Kokkos kernel function object.
+     * @param[out] prediction_d the predicted values
+     * @param[in] w_d the vector to speedup the calculations
+     * @param[in] rho_d the previously learned bias
+     * @param[in] predict_points_d the data points to predict
+     * @param[in] num_classes the number of classes
+     * @param[in] num_predict_points the number of data points to predict
+     * @param[in] num_features the number of features per data point
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     */
+    device_kernel_predict_linear(device_view_type<real_type> prediction_d, device_view_type<const real_type> w_d, device_view_type<const real_type> rho_d, device_view_type<const real_type> predict_points_d, const std::size_t num_classes, const std::size_t num_predict_points, const std::size_t num_features, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) :
+        prediction_d_{ prediction_d },
+        w_d_{ w_d },
+        rho_d_{ rho_d },
+        predict_points_d_{ predict_points_d },
+        num_classes_{ num_classes },
+        num_predict_points_{ num_predict_points },
+        num_features_{ num_features },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto FEATURE_BLOCK_SIZE_sz = static_cast<std::size_t>(FEATURE_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // calculate the indices used in the current thread
+        const auto pp_idx = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;
+        const auto pp_idx_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+        const auto class_idx = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz;
+        const auto class_idx_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+
+        // create the shared memory arrays used for caching data point features
+        constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE;
+        real_type *data_cache_ptr = static_cast<real_type *>(team.team_shmem().get_shmem(2 * shmem_size));
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_pp{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+        Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_w{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+        // create a thread private array used for internal caching
+        real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
+
+        // iterate over all features using blocking to be able to cache them for faster memory accesses
+        for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) {
+            // load data into shared memory
+            for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                const auto global_pp_idx = pp_idx_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+                const auto global_class_idx = class_idx_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_sz;
+
+                // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                data_cache_pp(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx];
+                data_cache_pp(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx];
+                data_cache_w(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = w_d_[(dim + threadIdx_y) * (num_classes_ + PADDING_SIZE_sz) + global_class_idx];
+                data_cache_w(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = w_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_classes_ + PADDING_SIZE_sz) + global_class_idx];
+            }
+            team.team_barrier();  // wait until all threads loaded their part of the data
+
+            // perform the dot product calculation
+            for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) {
+                for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) {
+                    for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                        temp[internal_pd][internal_class] += data_cache_w(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_class) * data_cache_pp(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_pd);
+                    }
+                }
+            }
+            team.team_barrier();  // wait until all threads performed their part of the calculations
+        }
+
+        // update global array with local one
+        for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) {
+            for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                const auto global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pd);
+                const auto global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                prediction_d_[global_pp_idx * (num_classes_ + PADDING_SIZE_sz) + global_class_idx] = temp[internal_pd][internal_class] - rho_d_[global_class_idx];
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    device_view_type<real_type> prediction_d_;
+    device_view_type<const real_type> w_d_;
+    device_view_type<const real_type> rho_d_;
+    device_view_type<const real_type> predict_points_d_;
+    const std::size_t num_classes_;
+    const std::size_t num_predict_points_;
+    const std::size_t num_features_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    /// @endcond
+};
+
+/**
+ * @brief Predict the @p predict_points_d using the @p kernel_function.
+ * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel
+ * @tparam kernel_function the type of the used kernel function
+ * @tparam Args the types of the parameters necessary for the specific kernel function
+ */
+template <typename ExecutionSpace, kernel_function_type kernel_function, typename... Args>
+class device_kernel_predict {
+    /**
+     * @brief The type of the used Kokkos::View.
+     */
+    template <typename T>
+    using device_view_type = Kokkos::View<T *, ExecutionSpace>;
+
+  public:
+    /**
+     * @brief Initialize the SYCL kernel function object.
+     * @param[in] prediction_d the predicted values
+     * @param[in] alpha_d the previously learned weights
+     * @param[in] rho_d the previously learned biases
+     * @param[in] sv_d the support vectors
+     * @param[in] predict_points_d the data points to predict
+     * @param[in] num_classes the number of classes
+     * @param[in] num_sv the number of support vectors
+     * @param[in] num_predict_points the number of data points to predict
+     * @param[in] num_features the number of features per data point
+     * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used
+     * @param[in] grid_size_x the size of the execution grid in x-dimension
+     * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
+     */
+    device_kernel_predict(device_view_type<real_type> prediction_d, device_view_type<const real_type> alpha_d, device_view_type<const real_type> rho_d, device_view_type<const real_type> sv_d, device_view_type<const real_type> predict_points_d, const std::size_t num_classes, const std::size_t num_sv, const std::size_t num_predict_points, const std::size_t num_features, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) :
+        prediction_d_{ prediction_d },
+        alpha_d_{ alpha_d },
+        rho_d_{ rho_d },
+        sv_d_{ sv_d },
+        predict_points_d_{ predict_points_d },
+        num_classes_{ num_classes },
+        num_sv_{ num_sv },
+        num_predict_points_{ num_predict_points },
+        num_features_{ num_features },
+        grid_x_offset_{ grid_x_offset },
+        grid_y_offset_{ grid_y_offset },
+        grid_size_x_{ grid_size_x },
+        kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward<Args>(kernel_function_parameter)...) } { }
+
+    /**
+     * @brief Function call operator overload performing the actual calculation.
+     * @param[in] team the Kokkos team representing the current point in the execution space
+     */
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) const {
+        // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows
+        const auto INTERNAL_BLOCK_SIZE_sz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+        const auto THREAD_BLOCK_SIZE_sz = static_cast<std::size_t>(THREAD_BLOCK_SIZE);
+        const auto FEATURE_BLOCK_SIZE_sz = static_cast<std::size_t>(FEATURE_BLOCK_SIZE);
+        const auto PADDING_SIZE_sz = static_cast<std::size_t>(PADDING_SIZE);
+        const auto threadIdx_x = static_cast<std::size_t>(team.team_rank()) / THREAD_BLOCK_SIZE_sz;            // current thread in block x-dimension
+        const auto threadIdx_y = static_cast<std::size_t>(team.team_rank()) % THREAD_BLOCK_SIZE_sz;            // current thread in block y-dimension
+        const auto blockDim_x = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block x-dimension
+        const auto blockDim_y = THREAD_BLOCK_SIZE_sz;                                                          // number of threads in block y-dimension
+        const auto blockIdx_x = static_cast<std::size_t>(team.league_rank()) % grid_size_x_ + grid_x_offset_;  // current block in grid x-dimension + offsets if the grid size would be too large
+        const auto blockIdx_y = static_cast<std::size_t>(team.league_rank()) / grid_size_x_ + grid_y_offset_;  // current block in grid y-dimension + offsets if the grid size would be too large
+
+        // calculate the indices used in the current thread
+        const auto pp_idx = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz;
+        const auto pp_idx_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+        const auto sv_idx_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x;
+
+        constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE;
+        real_type *data_cache_ptr = static_cast<real_type *>(team.team_shmem().get_shmem(2 * shmem_size));
+
+        // create a thread private array used for internal caching
+        real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
+
+        {
+            // create the shared memory arrays used for caching data point features
+            Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_pp{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+            Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> data_cache_sv{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+            // iterate over all features using blocking to be able to cache them for faster memory accesses
+            for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) {
+                // load data into shared memory
+                for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                    const auto global_pp_idx = pp_idx_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE;
+                    const auto global_sv_idx = sv_idx_linear + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE;
+
+                    // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                    data_cache_pp(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx];
+                    data_cache_pp(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx];
+                    data_cache_sv(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = sv_d_[(dim + threadIdx_y) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx];
+                    data_cache_sv(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = sv_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx];
+                }
+                team.team_barrier();  // wait until all threads loaded their part of the data
+
+                // perform the feature reduction calculation
+                for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) {
+                    for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) {
+                        for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                            temp[internal_pd][internal_sv] += detail::feature_reduce<kernel_function>(data_cache_sv(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_sv),
+                                                                                                      data_cache_pp(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_pd));
+                        }
+                    }
+                }
+                team.team_barrier();  // wait until all threads performed their part of the calculations
+            }
+        }
+
+        // update temp using the respective kernel function
+        for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) {
+            for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                temp[internal_pd][internal_sv] = detail::apply_kernel_function<kernel_function>(temp[internal_pd][internal_sv], kernel_function_parameter_);
+            }
+        }
+
+        {
+            // create the shared memory arrays used for caching data point features
+            Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> alpha_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+            Kokkos::mdspan<real_type, Kokkos::dextents<std::size_t, 2>> out_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE };
+
+            // iterate over all features using blocking to be able to cache them for faster memory accesses
+            for (std::size_t dim = 0; dim < num_classes_; dim += FEATURE_BLOCK_SIZE_sz) {
+                // load data into shared memory
+                for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                    const std::size_t global_sv_idx = sv_idx_linear + internal * THREAD_BLOCK_SIZE;
+
+                    // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory
+                    alpha_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_d_[(dim + threadIdx_y) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx];
+                    alpha_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx];
+
+                    // the bias (rho) must only be applied once for all support vectors
+                    if (blockIdx_y == std::size_t{ 0 }) {
+                        out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = -rho_d_[dim + threadIdx_y];
+                        out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = -rho_d_[dim + threadIdx_y + THREAD_BLOCK_SIZE_sz];
+                    } else {
+                        out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 };
+                        out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 };
+                    }
+                }
+                team.team_barrier();  // wait until all threads loaded their part of the data
+
+                // calculate intermediate results and store them in shared memory
+                for (unsigned class_idx = 0; class_idx < FEATURE_BLOCK_SIZE; ++class_idx) {
+                    for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) {
+                        for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                            out_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, internal_pd * THREAD_BLOCK_SIZE + threadIdx_x) +=
+                                temp[internal_pd][internal_sv] * alpha_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_sv);
+                        }
+                    }
+                    team.team_barrier();  // wait until all threads performed their part of the calculations
+                }
+
+                // add intermediate cached results to prediction_d
+                for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) {
+                    const auto global_pp_idx = pp_idx + static_cast<std::size_t>(internal);
+
+                    Kokkos::atomic_add(&prediction_d_[global_pp_idx * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y], out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x));
+                    Kokkos::atomic_add(&prediction_d_[global_pp_idx * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz], out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x));
+                }
+                team.team_barrier();  // wait until all threads updated their part of the prediction
+            }
+        }
+    }
+
+  private:
+    /// @cond Doxygen_suppress
+    device_view_type<real_type> prediction_d_;
+    device_view_type<const real_type> alpha_d_;
+    device_view_type<const real_type> rho_d_;
+    device_view_type<const real_type> sv_d_;
+    device_view_type<const real_type> predict_points_d_;
+    const std::size_t num_classes_;
+    const std::size_t num_sv_;
+    const std::size_t num_predict_points_;
+    const std::size_t num_features_;
+    const std::size_t grid_x_offset_;
+    const std::size_t grid_y_offset_;
+    const std::size_t grid_size_x_;
+    const detail::standard_layout_tuple<Args...> kernel_function_parameter_;
+    /// @endcond
+};
+
+}  // namespace plssvm::kokkos::detail
+
+#endif  // PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_
diff --git a/include/plssvm/backends/execution_range.hpp b/include/plssvm/backends/execution_range.hpp
index 3f4bae359..5be842f9a 100644
--- a/include/plssvm/backends/execution_range.hpp
+++ b/include/plssvm/backends/execution_range.hpp
@@ -12,6 +12,8 @@
 #ifndef PLSSVM_BACKENDS_EXECUTION_RANGE_HPP_
 #define PLSSVM_BACKENDS_EXECUTION_RANGE_HPP_
 
+#include "plssvm/backend_types.hpp"  // plssvm::backend_type
+
 #include "fmt/base.h"     // fmt::formatter
 #include "fmt/ostream.h"  // fmt::ostream_formatter
 
diff --git a/include/plssvm/backends/gpu_device_ptr.hpp b/include/plssvm/backends/gpu_device_ptr.hpp
index e1b47a5c8..78729691f 100644
--- a/include/plssvm/backends/gpu_device_ptr.hpp
+++ b/include/plssvm/backends/gpu_device_ptr.hpp
@@ -415,14 +415,14 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::swap(
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::memset(const int pattern, const size_type pos) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     this->memset(pattern, pos, this->size_padded() * sizeof(value_type));
 }
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::fill(const value_type value, const size_type pos) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     this->fill(value, pos, this->size_padded());
 }
@@ -430,7 +430,7 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::fill(
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 template <layout_type layout>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_device(const matrix<value_type, layout> &data_to_copy) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     if (data_to_copy.size_padded() < this->size_padded()) {
         throw gpu_device_ptr_exception{ fmt::format("Too few data to perform copy (needed: {}, provided: {})!", this->size_padded(), data_to_copy.size_padded()) };
@@ -440,14 +440,14 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_device(const std::vector<value_type> &data_to_copy) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     this->copy_to_device(data_to_copy, 0, this->size_padded());
 }
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_device(const std::vector<value_type> &data_to_copy, const size_type pos, const size_type count) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     const size_type rcount = std::min(count, this->size_padded() - pos);
     if (data_to_copy.size() < rcount) {
@@ -458,7 +458,7 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_device(const_host_pointer_type data_to_copy) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
     PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!");
 
     this->copy_to_device(data_to_copy, 0, this->size_padded());
@@ -467,7 +467,7 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 template <layout_type layout>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_device_strided(const matrix<value_type, layout> &data_to_copy, const std::size_t start_row, const std::size_t num_rows) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     if (start_row + num_rows > data_to_copy.num_rows()) {
         throw gpu_device_ptr_exception{ fmt::format("Tried to copy lines {}-{} (zero-based index) to the device, but the matrix has only {} lines!", start_row, start_row + num_rows - 1, data_to_copy.num_rows()) };
@@ -493,7 +493,7 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_device_strided(const std::vector<value_type> &data_to_copy, std::size_t spitch, std::size_t width, std::size_t height) {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     if (width > spitch) {
         throw gpu_device_ptr_exception{ fmt::format("Invalid width and spitch combination specified (width: {} <= spitch: {})!", width, spitch) };
@@ -508,7 +508,7 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 template <layout_type layout>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_host(matrix<value_type, layout> &buffer) const {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     if (buffer.size_padded() < this->size_padded()) {
         throw gpu_device_ptr_exception{ fmt::format("Buffer too small to perform copy (needed: {}, provided: {})!", this->size_padded(), buffer.size_padded()) };
@@ -518,14 +518,14 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_host(std::vector<value_type> &buffer) const {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     this->copy_to_host(buffer, 0, this->size_padded());
 }
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_host(std::vector<value_type> &buffer, const size_type pos, const size_type count) const {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
 
     const size_type rcount = std::min(count, this->size_padded() - pos);
     if (buffer.size() < rcount) {
@@ -536,7 +536,7 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_host(host_pointer_type buffer) const {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
     PLSSVM_ASSERT(buffer != nullptr, "Invalid host pointer for the data to copy!");
 
     this->copy_to_host(buffer, 0, this->size_padded());
@@ -544,8 +544,8 @@ void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_
 
 template <typename T, typename queue_t, typename device_pointer_t, typename derived_gpu_device_ptr>
 void gpu_device_ptr<T, queue_t, device_pointer_t, derived_gpu_device_ptr>::copy_to_other_device(derived_gpu_device_ptr &target) const {
-    PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?");
-    PLSSVM_ASSERT(target.get() != nullptr, "Invalid target pointer! Maybe target has been default constructed?");
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(target.get() != device_pointer_type{}, "Invalid target pointer! Maybe target has been default constructed?");
 
     this->copy_to_other_device(target, 0, this->size_padded());
 }
diff --git a/include/plssvm/core.hpp b/include/plssvm/core.hpp
index 4e1fd1be1..96e56d8e1 100644
--- a/include/plssvm/core.hpp
+++ b/include/plssvm/core.hpp
@@ -132,4 +132,10 @@ using namespace plssvm::PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION;
 /// Namespace containing the C-SVM using the SYCL backend with the preferred SYCL implementation. **Should not** directly be used by users.
 namespace plssvm::sycl::detail { }
 
+/// Namespace containing the C-SVM using the Kokkos backend.
+namespace plssvm::kokkos { }
+
+/// Namespace containing Kokkos backend specific implementation details. **Should not** directly be used by users.
+namespace plssvm::kokkos::detail { }
+
 #endif  // PLSSVM_CORE_HPP_
diff --git a/include/plssvm/csvm_factory.hpp b/include/plssvm/csvm_factory.hpp
index a1272a5e0..fb7760221 100644
--- a/include/plssvm/csvm_factory.hpp
+++ b/include/plssvm/csvm_factory.hpp
@@ -48,6 +48,9 @@
         #include "plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp"  // plssvm::adaptivecpp::csvm, plssvm::csvm_backend_exists_v
     #endif
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    #include "plssvm/backends/Kokkos/csvm.hpp"  // plssvm::kokkos::csvm, plssvm::csvm_backend_exists_v
+#endif
 
 #include "fmt/format.h"   // fmt::format
 #include "igor/igor.hpp"  // igor::parser, igor::has_unnamed_arguments
@@ -143,6 +146,8 @@ template <typename... Args>
             return make_csvm_default_impl<opencl::csvm>(std::forward<Args>(args)...);
         case backend_type::sycl:
             return make_csvm_sycl_impl(std::forward<Args>(args)...);
+        case backend_type::kokkos:
+            return make_csvm_default_impl<kokkos::csvm>(std::forward<Args>(args)...);
     }
     throw unsupported_backend_exception{ "Unrecognized backend provided!" };
 }
diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp
index 2b96416ae..4ba2e1a65 100644
--- a/include/plssvm/detail/cmd/parser_predict.hpp
+++ b/include/plssvm/detail/cmd/parser_predict.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "plssvm/backend_types.hpp"                       // plssvm::backend_type
+#include "plssvm/backends/Kokkos/execution_space.hpp"     // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"  // plssvm::sycl::implementation_type
 #include "plssvm/target_platforms.hpp"                    // plssvm::target_platform
 
@@ -37,7 +38,7 @@ struct parser_predict {
      */
     parser_predict(int argc, char **argv);
 
-    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
+    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, SYCL, or Kokkos.
     backend_type backend{ backend_type::automatic };
     /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
     target_platform target{ target_platform::automatic };
@@ -45,6 +46,9 @@ struct parser_predict {
     /// The SYCL implementation to use with `--backend sycl`.
     sycl::implementation_type sycl_implementation_type{ sycl::implementation_type::automatic };
 
+    /// The Kokkos execution space to use with --backend=kokkos.
+    kokkos::execution_space kokkos_execution_space{ kokkos::execution_space::automatic };
+
     /// `true` if `std::string` should be used as label type instead of the default type `ìnt`.
     bool strings_as_labels{ false };
 
diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp
index a723fa82e..73897249a 100644
--- a/include/plssvm/detail/cmd/parser_train.hpp
+++ b/include/plssvm/detail/cmd/parser_train.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "plssvm/backend_types.hpp"                          // plssvm::backend_type
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"     // plssvm::sycl::implementation_type
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/classification_types.hpp"                   // plssvm::classification_type
@@ -53,7 +54,7 @@ struct parser_train {
     /// The multi-class classification strategy used.
     classification_type classification{ classification_type::oaa };
 
-    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
+    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, SYCL, or Kokkos.
     backend_type backend{ backend_type::automatic };
     /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
     target_platform target{ target_platform::automatic };
@@ -65,6 +66,9 @@ struct parser_train {
     /// The SYCL implementation to use with --backend=sycl.
     sycl::implementation_type sycl_implementation_type{ sycl::implementation_type::automatic };
 
+    /// The Kokkos execution space to use with --backend=kokkos.
+    kokkos::execution_space kokkos_execution_space{ kokkos::execution_space::automatic };
+
     /// `true` if `std::string` should be used as label type instead of the default type `ìnt`.
     bool strings_as_labels{ false };
 
diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index 69a6dab24..cddb3f31c 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -20,21 +20,27 @@
 #include "plssvm/detail/utility.hpp"         // plssvm::detail::{contains, unreachable}
 #include "plssvm/exceptions/exceptions.hpp"  // plssvm::environment_exception
 
-#include "fmt/base.h"     // fmt::formatter
-#include "fmt/ostream.h"  // fmt::ostream_formatter
-#include "fmt/ranges.h"   // fmt::join
-
-#include <ios>      // std::ios::failbit
-#include <istream>  // std::istream
-#include <ostream>  // std::ostream
-#include <string>   // std::string
-#include <vector>   // std::vector
-
 #if defined(PLSSVM_HAS_HPX_BACKEND)
     #include <hpx/execution.hpp>  // ::hpx::post
     #include <hpx/hpx_start.hpp>  // ::hpx::{start, stop, finalize}
     #include <hpx/runtime.hpp>    // ::hpx::{is_running, is_stopped}
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    #include "Kokkos_Core.hpp"  // Kokkos::is_initialized, Kokkos::is_finalized, Kokkos::initialize, Kokkos::finalize
+#endif
+
+#include "fmt/base.h"     // fmt::formatter
+#include "fmt/format.h"   // fmt::format
+#include "fmt/ostream.h"  // fmt::ostream_formatter
+#include "fmt/ranges.h"   // fmt::join
+
+#include <algorithm>  // std::remove_if
+#include <ios>        // std::ios::failbit
+#include <istream>    // std::istream
+#include <ostream>    // std::ostream
+#include <string>     // std::string
+#include <utility>    // std::move
+#include <vector>     // std::vector
 
 namespace plssvm::environment {
 
@@ -161,6 +167,14 @@ template <auto is_initialized_function, auto is_finalized_function>
                 return detail::determine_status_from_initialized_finalized_functions<::hpx::is_running, ::hpx::is_stopped>();
 #else
                 return status::unnecessary;
+#endif
+            }
+        case backend_type::kokkos:
+            {
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+                return detail::determine_status_from_initialized_finalized_functions<Kokkos::is_initialized, Kokkos::is_finalized>();
+#else
+                return status::unnecessary;
 #endif
             }
     }
@@ -176,7 +190,7 @@ template <auto is_initialized_function, auto is_finalized_function>
 constexpr bool is_initialization_necessary([[maybe_unused]] const backend_type backend) {
     // Note: must be implemented for the backends that need environmental setup
     // currently false for all available backends
-    return false;
+    return backend == backend_type::hpx || backend == backend_type::kokkos;
 }
 
 //****************************************************************************//
@@ -198,6 +212,11 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend) {
         ::hpx::start(nullptr, 0, nullptr);
     }
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    if (backend == backend_type::kokkos) {
+        Kokkos::initialize();
+    }
+#endif
 }
 
 /**
@@ -215,6 +234,11 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend, [[ma
         ::hpx::start(nullptr, argc, argv);
     }
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    if (backend == backend_type::kokkos) {
+        Kokkos::initialize(argc, argv);
+    }
+#endif
 }
 
 /**
@@ -231,6 +255,11 @@ inline void finalize_backend([[maybe_unused]] const backend_type backend) {
         ::hpx::stop();
     }
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    if (backend == backend_type::kokkos) {
+        Kokkos::finalize();
+    }
+#endif
 }
 
 /**
@@ -430,7 +459,8 @@ inline std::vector<backend_type> finalize() {
 class [[nodiscard]] scope_guard {
   public:
     /**
-     * @copydoc initialize()
+     * @brief Initialize all **available** backends.
+     * @details Only initializes backends that are currently uninitialized.
      */
     scope_guard() {
         backends_ = initialize();
@@ -445,7 +475,10 @@ class [[nodiscard]] scope_guard {
     }
 
     /**
-     * @copydoc initialize(int &, char **)
+     * @brief Initialize all **available** backends.
+     * @details Only initializes backends that are currently uninitialized.
+     * @param[in,out] argc the number of provided command line arguments
+     * @param[in,out] argv the provided command line arguments
      */
     scope_guard(int &argc, char **argv) {
         backends_ = initialize(argc, argv);
diff --git a/include/plssvm/parameter.hpp b/include/plssvm/parameter.hpp
index 4e51b90d7..516c66386 100644
--- a/include/plssvm/parameter.hpp
+++ b/include/plssvm/parameter.hpp
@@ -56,6 +56,8 @@ IGOR_MAKE_NAMED_ARGUMENT(classification);
 IGOR_MAKE_NAMED_ARGUMENT(sycl_implementation_type);
 /// Create a named argument for the SYCL backend specific kernel invocation type.
 IGOR_MAKE_NAMED_ARGUMENT(sycl_kernel_invocation_type);
+/// Create a named argument for the Kokkos backend specific execution space.
+IGOR_MAKE_NAMED_ARGUMENT(kokkos_execution_space);
 
 /// @endcond
 
@@ -73,6 +75,12 @@ constexpr bool has_only_parameter_named_args_v = !igor::has_other_than<Args...>(
 template <typename... Args>
 constexpr bool has_only_sycl_parameter_named_args_v = !igor::has_other_than<Args...>(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::sycl_implementation_type, plssvm::sycl_kernel_invocation_type);
 
+/**
+ * @brief Trait to check whether @p Args only contains named-parameter that can be used to initialize a `plssvm::parameter` struct including Kokkos specific named-parameters.
+ */
+template <typename... Args>
+constexpr bool has_only_kokkos_parameter_named_args_v = !igor::has_other_than<Args...>(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::kokkos_execution_space);
+
 }  // namespace detail
 
 /**
@@ -185,7 +193,7 @@ struct parameter {
         // compile time check: each named parameter must only be passed once
         static_assert(!parser.has_duplicates(), "Can only use each named parameter once!");
         // compile time check: only some named parameters are allowed
-        static_assert(!parser.has_other_than(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::sycl_implementation_type, plssvm::sycl_kernel_invocation_type),
+        static_assert(!parser.has_other_than(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::sycl_implementation_type, plssvm::sycl_kernel_invocation_type, plssvm::kokkos_execution_space),
                       "An illegal named parameter has been passed!");
 
         // shorthand function for emitting a warning if a provided parameter is not used by the current kernel function
diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index ff28028c8..3d47ad53f 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -15,6 +15,7 @@
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE,
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
+#include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
 
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
@@ -32,6 +33,7 @@
 #include <fstream>     // std::ofstream
 #include <functional>  // std::mem_fn
 #include <iostream>    // std::cerr, std::endl
+#include <memory>      // std::unique_ptr, std::make_unique
 #include <utility>     // std::pair
 #include <variant>     // std::visit
 #include <vector>      // std::vector
@@ -74,20 +76,31 @@ int main(int argc, char *argv[]) {
 
             // check whether SYCL is used as backend (it is either requested directly or as automatic backend)
             const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
-
             // check whether HPX is used as backend (it is either requested directly or as automatic backend)
             const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+            // check whether Kokkos is used as backend (it is either requested directly or as automatic backend)
+            const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) };
 
             // initialize environments if necessary
             std::vector<plssvm::backend_type> backends_to_initialize{};
             if (use_hpx_as_backend) {
                 backends_to_initialize.push_back(plssvm::backend_type::hpx);
             }
+            if (use_kokkos_as_backend) {
+                backends_to_initialize.push_back(plssvm::backend_type::kokkos);
+            }
             environment_guard = std::make_unique<plssvm::environment::scope_guard>(backends_to_initialize);
 
             // create default csvm
-            const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type)
-                                                                          : plssvm::make_csvm(cmd_parser.backend, cmd_parser.target);
+            const std::unique_ptr<plssvm::csvm> svm = [&]() {
+                if (use_sycl_as_backend) {
+                    return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type);
+                } else if (use_kokkos_as_backend) {
+                    return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space);
+                } else {
+                    return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target);
+                }
+            }();
 
             // create model
             const plssvm::model<label_type> model{ cmd_parser.model_filename };
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 32ac09d71..2e2a39905 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -14,6 +14,7 @@
 #include "plssvm/detail/logging.hpp"                       // plssvm::detail::log
 #include "plssvm/detail/tracking/performance_tracker.hpp"  // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE,
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
+#include "plssvm/detail/assert.hpp"                        // PLSSVM_ASSERT
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
 
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
@@ -27,7 +28,7 @@
 #include <exception>    // std::exception
 #include <functional>   // std::mem_fn
 #include <iostream>     // std::cerr, std::endl
-#include <memory>       // std::unique_ptr
+#include <memory>       // std::unique_ptr, std::make_unique
 #include <type_traits>  // std::remove_reference_t
 #include <utility>      // std::pair
 #include <variant>      // std::visit
@@ -71,20 +72,31 @@ int main(int argc, char *argv[]) {
 
             // check whether SYCL is used as backend (it is either requested directly or as automatic backend)
             const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
-
             // check whether HPX is used as backend (it is either requested directly or as automatic backend)
             const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+            // check whether Kokkos is used as backend (it is either requested directly or as automatic backend)
+            const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) };
 
             // initialize environments if necessary
             std::vector<plssvm::backend_type> backends_to_initialize{};
             if (use_hpx_as_backend) {
                 backends_to_initialize.push_back(plssvm::backend_type::hpx);
             }
+            if (use_kokkos_as_backend) {
+                backends_to_initialize.push_back(plssvm::backend_type::kokkos);
+            }
             environment_guard = std::make_unique<plssvm::environment::scope_guard>(backends_to_initialize);
 
             // create SVM
-            const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type)
-                                                                          : plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params);
+            const std::unique_ptr<plssvm::csvm> svm = [&]() {
+                if (use_sycl_as_backend) {
+                    return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type);
+                } else if (use_kokkos_as_backend) {
+                    return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space);
+                } else {
+                    return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params);
+                }
+            }();
 
             // only specify plssvm::max_iter if it isn't its default value
             const plssvm::model<label_type> model =
diff --git a/src/plssvm/backend_types.cpp b/src/plssvm/backend_types.cpp
index 34789a764..a1021e7dd 100644
--- a/src/plssvm/backend_types.cpp
+++ b/src/plssvm/backend_types.cpp
@@ -51,6 +51,9 @@ std::vector<backend_type> list_available_backends() {
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
     available_backends.push_back(backend_type::sycl);
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    available_backends.push_back(backend_type::kokkos);
+#endif
 
     // automatic is ALWAYS available but AT LEAST ONE other backend must be available in addition
     PLSSVM_ASSERT(available_backends.size() > 1, "Besides \"automatic\" at least one other backend must be available!");
@@ -62,10 +65,10 @@ backend_type determine_default_backend(const std::vector<backend_type> &availabl
     // the decision order based on empiric findings
     using decision_order_type = std::pair<target_platform, std::vector<backend_type>>;
     const std::array decision_order = {
-        decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
-        decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
-        decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::stdpar } },
-        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::hpx, backend_type::stdpar } }
+        decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::kokkos, backend_type::stdpar } },
+        decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::kokkos, backend_type::stdpar } },
+        decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::kokkos, backend_type::stdpar } },
+        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::kokkos, backend_type::opencl, backend_type::openmp, backend_type::hpx, backend_type::stdpar } }
     };
 
     // return the default backend based on the previously defined decision order
@@ -101,6 +104,8 @@ std::ostream &operator<<(std::ostream &out, const backend_type backend) {
             return out << "opencl";
         case backend_type::sycl:
             return out << "sycl";
+        case backend_type::kokkos:
+            return out << "kokkos";
     }
     return out << "unknown";
 }
@@ -126,6 +131,8 @@ std::istream &operator>>(std::istream &in, backend_type &backend) {
         backend = backend_type::opencl;
     } else if (str == "sycl") {
         backend = backend_type::sycl;
+    } else if (str == "kokkos") {
+        backend = backend_type::kokkos;
     } else {
         in.setstate(std::ios::failbit);
     }
diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt
new file mode 100644
index 000000000..bf37122f2
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/CMakeLists.txt
@@ -0,0 +1,182 @@
+## Authors: Alexander Van Craen, Marcel Breyer
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+list(APPEND CMAKE_MESSAGE_INDENT "Kokkos:  ")
+
+# check if Kokkos can be enabled
+message(CHECK_START "Checking for Kokkos backend")
+
+find_package(Kokkos)
+
+if (NOT Kokkos_FOUND)
+    message(CHECK_FAIL "not found")
+    if (PLSSVM_ENABLE_KOKKOS_BACKEND MATCHES "ON")
+        message(SEND_ERROR "Cannot find requested backend: Kokkos!")
+    endif ()
+    return()
+endif ()
+message(CHECK_PASS "found")
+
+# explicitly set sources
+set(PLSSVM_KOKKOS_SOURCES
+    ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/device_wrapper.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
+)
+
+# set target properties
+set_local_and_parent(PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME plssvm-Kokkos)
+add_library(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_KOKKOS_SOURCES})
+target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC Kokkos::kokkos)
+
+if (Kokkos_ENABLE_SYCL)
+    # set SYCL (icpx) specific compilation flags
+    if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "IntelLLVM")
+        message(FATAL_ERROR "For Kokkos::SYCL to work, the compiler must be IntelLLVM, but is ${CMAKE_CXX_COMPILER}!")
+    endif ()
+    
+    # set icpx specific compiler flags based on the provided PLSSVM_TARGET_PLATFORMS
+    set(PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "")
+    # cpu targets
+    if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
+        # assemble -fsycl-targets
+        list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "spir64_x86_64")
+    endif ()
+    # nvidia targets
+    if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
+        # assemble -fsycl-targets
+        list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "nvptx64-nvidia-cuda")
+    endif ()
+    # amd targets
+    if (DEFINED PLSSVM_AMD_TARGET_ARCHS)
+        # assemble -fsycl-targets
+        list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "amdgcn-amd-amdhsa")
+        # add target specific flags for AOT -> must always be specified von amd targets
+        if (NOT PLSSVM_NUM_AMD_TARGET_ARCHS EQUAL 1)
+            message(SEND_ERROR "IntelLLVM currently only supports a single AMD architecture specification but ${PLSSVM_NUM_AMD_TARGET_ARCHS} were provided!")
+        endif ()
+        target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS})
+        target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS})
+    endif ()
+    # intel targets
+    if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
+        # assemble -fsycl-targets
+        list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "spir64_gen")
+    endif ()
+    # set -fsycl-targets
+    list(JOIN PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "," PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING)
+    target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -sycl-std=2020 -fsycl -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING})
+    target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING})
+    
+    # add option for IntelLLVM Ahead-of-Time (AOT) compilation
+    option(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT "Enables Ahead-of-Time compilation for the Kokkos::SYCL execution space using IntelLLVM." ON)
+    if (PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT)
+        message(STATUS "Enabled Ahead-of-Time (AOT) compilation for the Kokkos::SYCL execution space using IntelLLVM.")
+        target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT)
+        target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT)
+        ## set AOT compiler flags
+        # cpu targets
+        if (DEFINED PLSSVM_CPU_TARGET_ARCHS)
+            # add target specific flags for AOT
+            if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1)
+                target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}")
+                target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}")
+            endif ()
+        endif ()
+        # nvidia targets
+        if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS)
+            # add target specific flags for AOT
+            if (NOT PLSSVM_NUM_NVIDIA_TARGET_ARCHS EQUAL 1)
+                message(SEND_ERROR "IntelLLVM currently only supports a single NVIDIA architecture specification for AOT but ${PLSSVM_NUM_NVIDIA_TARGET_ARCHS} were provided!")
+            endif ()
+            target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS})
+            target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS})
+        endif ()
+        # intel targets
+        if (DEFINED PLSSVM_INTEL_TARGET_ARCHS)
+            # add target specific flags for AOT
+            list(JOIN PLSSVM_INTEL_TARGET_ARCHS "," PLSSVM_INTEL_TARGET_ARCHS_STRING)
+            target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}")
+            target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}")
+        endif ()
+    endif ()
+endif ()
+
+# link base library against Kokkos library
+target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME})
+
+# set compile definition that the Kokkos backend is available
+target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_HAS_KOKKOS_BACKEND)
+target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_HAS_KOKKOS_BACKEND)
+
+# link against interface library
+target_link_libraries(${PLSSVM_ALL_LIBRARY_NAME} INTERFACE ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME})
+
+# mark backend library as install target
+append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME})
+
+# assemble Kokkos available execution space string
+# also set compile definitions -> can't use KOKKOS_ENABLE_* directly inside the "constexpr_available_execution_space.hpp"
+# header since we can't include "Kokkos_Core.hpp" there (transitively used in the base library that doesn't know anything about Kokkos
+set(PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "")
+if (Kokkos_ENABLE_CUDA)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_CUDA)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Cuda")
+endif ()
+if (Kokkos_ENABLE_HIP)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_HIP)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "HIP")
+endif ()
+if (Kokkos_ENABLE_SYCL)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_SYCL)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "SYCL")
+endif ()
+if (Kokkos_ENABLE_HPX)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_HPX)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "HPX")
+endif ()
+if (Kokkos_ENABLE_OPENMP)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMP)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenMP")
+endif ()
+if (Kokkos_ENABLE_OPENMPTARGET)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMPTARGET)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenMPTarget")
+endif ()
+if (Kokkos_ENABLE_OPENACC)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_OPENACC)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenACC")
+endif ()
+if (Kokkos_ENABLE_THREADS)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_THREADS)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Threads")
+endif ()
+if (Kokkos_ENABLE_SERIAL)
+    target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_SERIAL)
+    list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Serial")
+endif ()
+set(PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}" PARENT_SCOPE)
+
+# also set the number of available Kokkos execution spaces to explicitly set the type of the used std::array
+# -> necessary if NO Kokkos execution space is available and, therefore, the size of the std::array would be 0 (can't automatically be deduced)
+list(LENGTH PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES)
+target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES=${PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES})
+
+# generate summary string
+set(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_COMPILER " - Kokkos (${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}):")
+include(${PROJECT_SOURCE_DIR}/cmake/assemble_summary_string.cmake)
+assemble_summary_string(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS)
+# do not print any special target architecture information
+string(REPLACE " (${PLSSVM_CPU_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}")
+string(REPLACE " (${PLSSVM_NVIDIA_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}")
+string(REPLACE " (${PLSSVM_AMD_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}")
+string(REPLACE " (${PLSSVM_INTEL_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}")
+set(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_COMPILER}${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}" PARENT_SCOPE)
+
+list(POP_BACK CMAKE_MESSAGE_INDENT)
\ No newline at end of file
diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp
new file mode 100644
index 000000000..603a5216c
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/csvm.cpp
@@ -0,0 +1,732 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/csvm.hpp"
+
+#include "plssvm/backends/execution_range.hpp"                                        // plssvm::detail::{execution_range, dim_type}
+#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp"                    // PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_*, PLSSVM_KOKKOS_BACKEND_INVOKE_IF_
+#include "plssvm/backends/Kokkos/detail/device_ptr.hpp"                               // plssvm::kokkos::detail::device_ptr
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"                           // plssvm::kokkos::detail::{device_wrapper, get_device_list}
+#include "plssvm/backends/Kokkos/detail/utility.hpp"                                  // plssvm::kokkos::detail::{available_target_platform_to_execution_space_mapping, get_kokkos_version, dim_type_to_native, get_device_name, device_synchronize}
+#include "plssvm/backends/Kokkos/exceptions.hpp"                                      // plssvm::kokkos::backend_exception
+#include "plssvm/backends/Kokkos/execution_space.hpp"                                 // plssvm::kokkos::{execution_space, list_available_execution_spaces}
+#include "plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp"                         // plssvm::kokkos::detail::{device_kernel_symm, device_kernel_symm_mirror, device_kernel_inplace_matrix_add, device_kernel_inplace_matrix_scale}
+#include "plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::kokkos::detail::device_kernel_assembly
+#include "plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::kokkos::detail::device_kernel_assembly_symm
+#include "plssvm/backends/Kokkos/kernel/predict_kernel.hpp"                           // plssvm::kokkos::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
+#include "plssvm/constants.hpp"                                                       // plssvm::THREAD_BLOCK_SIZE, plssvm::INTERNAL_BLOCK_SIZE, plssvm::FEATURE_BLOCK_SIZE
+#include "plssvm/detail/assert.hpp"                                                   // PLSSVM_ASSERT
+#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::triangular_data_distribution
+#include "plssvm/detail/logging.hpp"                                                  // plssvm::detail::log
+#include "plssvm/detail/memory_size.hpp"                                              // plssvm::detail::memory_size
+#include "plssvm/detail/tracking/performance_tracker.hpp"                             // plssvm::detail::tracking::tracking_entry
+#include "plssvm/detail/type_traits.hpp"                                              // plssvm::detail::remove_cvref_t
+#include "plssvm/detail/utility.hpp"                                                  // plssvm::detail::{get_system_memory, unreachable}
+#include "plssvm/exceptions/exceptions.hpp"                                           // plssvm::exception
+#include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
+#include "plssvm/parameter.hpp"                                                       // plssvm::parameter
+#include "plssvm/target_platforms.hpp"                                                // plssvm::target_platform
+#include "plssvm/verbosity_levels.hpp"                                                // plssvm::verbosity_level
+
+#include "Kokkos_Core.hpp"  // Kokkos::TeamPolicy, Kokkos::ParallelForTag, Kokkos::parallel_for, Kokkos::PerTeam
+                            // Kokkos::Experimental::HPX::impl_max_hardware_threads, Kokkos::OpenMP::impl_max_hardware_threads, Kokkos::Threads::impl_max_hardware_threads
+
+#include "fmt/core.h"    // fmt::format
+#include "fmt/format.h"  // fmt::format
+
+#include <cmath>      // std::sqrt
+#include <cstddef>    // std::size_t
+#include <exception>  // std::terminate
+#include <iostream>   // std::cout, std::endl
+#include <limits>     // std::numeric_limits::max
+#include <map>        // std::map
+#include <string>     // std::string
+#include <utility>    // std::move
+#include <vector>     // std::vector
+
+// a dummy class used as functor to the team_size_max function
+template <typename ExecutionSpace>
+struct dummy {
+    KOKKOS_INLINE_FUNCTION
+    void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &) const { }
+};
+
+namespace plssvm::kokkos {
+
+csvm::csvm(parameter params) :
+    csvm{ plssvm::target_platform::automatic, params } { }
+
+csvm::csvm(target_platform target, parameter params) :
+    base_type{ params } {
+    this->init(target);
+}
+
+void csvm::init(const target_platform target) {
+    // check whether the requested target platform has been enabled
+    switch (target) {
+        case target_platform::automatic:
+            break;
+        case target_platform::cpu:
+#if !defined(PLSSVM_HAS_CPU_TARGET)
+            throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) };
+#endif
+            break;
+        case target_platform::gpu_nvidia:
+#if !defined(PLSSVM_HAS_NVIDIA_TARGET)
+            throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) };
+#endif
+            break;
+        case target_platform::gpu_amd:
+#if !defined(PLSSVM_HAS_AMD_TARGET)
+            throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) };
+#endif
+            break;
+        case target_platform::gpu_intel:
+#if !defined(PLSSVM_HAS_INTEL_TARGET)
+            throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) };
+#endif
+            break;
+    }
+
+    // check whether the requested execution space is available
+    if (!::plssvm::detail::contains(list_available_execution_spaces(), space_)) {
+        throw backend_exception{ fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space_, fmt::join(list_available_execution_spaces(), ", ")) };
+    }
+
+    // get all available target_platform <-> Kokkos::ExecutionSpace combinations
+    const std::map<target_platform, std::vector<execution_space>> available_combinations = detail::available_target_platform_to_execution_space_mapping();
+
+    // check whether the provided execution space is the automatic one
+    if (space_ == execution_space::automatic) {
+        // automatically determine the execution space and potentially automatically determine the target platform
+        if (target == target_platform::automatic) {
+            bool found_combination{ false };
+            // go through all combinations and choose the first execution space in order: gpu_nvidia -> gpu_amd -> gpu_intel -> cpu
+            for (const target_platform target_order : list_available_target_platforms()) {
+                if (::plssvm::detail::contains(available_combinations, target_order)) {
+                    // the target platform is supported -> choose the first execution space to use in the Kokkos backend
+                    space_ = available_combinations.at(target_order).front();
+                    target_ = target_order;
+                    found_combination = true;
+                    break;
+                }
+            }
+            // check whether a valid combination could be found
+            if (!found_combination) {
+                throw backend_exception{ fmt::format("Couldn't find a valid Kokkos::ExecutionSpace ({}) and target_platform ({}) combination!", fmt::join(list_available_execution_spaces(), ", "), fmt::join(list_available_target_platforms(), ", ")) };
+            }
+        } else {
+            // check whether the provided target platform is compatible with the currently available Kokkos::ExecutionSpaces
+            if (::plssvm::detail::contains(available_combinations, target)) {
+                // the target platform is supported -> choose the first execution space to use in the Kokkos backend
+                space_ = available_combinations.at(target).front();
+                target_ = target;
+            } else {
+                // the provided target platform is unsupported -> throw an exception
+                throw backend_exception{ fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform {}!", fmt::join(list_available_execution_spaces(), ", "), target) };
+            }
+        }
+
+        // output what we use as automatic Kokkos execution space
+        plssvm::detail::log(verbosity_level::full,
+                            "\nUsing {} as automatic Kokkos::ExecutionSpace.",
+                            space_);
+    } else {
+        // execution space explicitly provided and potentially automatically determine the target platform
+        if (target == target_platform::automatic) {
+            bool found_combination{ false };
+            // go through all combinations (gpu_nvidia -> gpu_amd -> gpu_intel -> cpu) and check whether the requested execution space supports that target platform
+            for (const target_platform target_order : list_available_target_platforms()) {
+                if (::plssvm::detail::contains(available_combinations, target_order) && ::plssvm::detail::contains(available_combinations.at(target_order), space_)) {
+                    // the provided execution space supports the target platform
+                    target_ = target_order;
+                    found_combination = true;
+                    break;
+                }
+            }
+            // check whether a valid combination could be found
+            if (!found_combination) {
+                throw backend_exception{ fmt::format("Couldn't find a valid target_platform for the Kokkos::ExecutionSpace {}!", space_) };
+            }
+        } else {
+            if (::plssvm::detail::contains(available_combinations, target) && ::plssvm::detail::contains(available_combinations.at(target), space_)) {
+                // update target
+                target_ = target;
+            } else {
+                // the provided execution space and target platform combination is unsupported
+                throw backend_exception{ fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform {}!", space_, target) };
+            }
+        }
+    }
+
+    // At this point, space_ may NEVER be execution_space::automatic!
+    PLSSVM_ASSERT(space_ != execution_space::automatic, "At this point, the Kokkos execution space must be determined and must NOT be automatic!");
+    PLSSVM_ASSERT(target_ != target_platform::automatic, "At this point, the target platform must be determined and must NOT be automatic!");
+
+    // Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC currently not supported!
+    if (space_ == execution_space::openmp_target || space_ == execution_space::openacc) {
+        throw backend_exception{ fmt::format("The Kokkos execution space {} is currently not supported!", space_) };
+    }
+
+    plssvm::detail::log(verbosity_level::full,
+                        "\nUsing Kokkos ({}) as backend with the Kokkos::ExecutionSpace {}.\n",
+                        plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() },
+                        plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_default_execution_space", space_ });
+
+    // output automatic target platform information
+    if (target == target_platform::automatic) {
+        plssvm::detail::log(verbosity_level::full,
+                            "Using {} as automatic target platform.\n",
+                            target_);
+    }
+
+    // get all available devices wrt the requested target platform
+    devices_ = detail::get_device_list(space_, target_);
+
+    // throw exception if no devices in the current execution space could be found
+    if (devices_.empty()) {
+        throw backend_exception{ fmt::format("No devices found for the Kokkos execution space {} with the target platform {}!", space_, target_) };
+    }
+
+    // print found Kokkos devices
+    plssvm::detail::log(verbosity_level::full,
+                        "Found {} Kokkos device(s) for the target platform {}:\n",
+                        plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() },
+                        plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ });
+
+    std::vector<std::string> device_names{};
+    device_names.reserve(devices_.size());
+    for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
+        const std::string device_name = detail::get_device_name(devices_[device]);
+        plssvm::detail::log(verbosity_level::full,
+                            "  [{}, {}]\n",
+                            device,
+                            device_name);
+        device_names.emplace_back(device_name);
+    }
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names }));
+    plssvm::detail::log(verbosity_level::full | verbosity_level::timing,
+                        "\n");
+}
+
+csvm::~csvm() {
+    try {
+        // be sure that all operations on the CUDA devices have finished before destruction
+        for (const queue_type &device : devices_) {
+            detail::device_synchronize(device);
+        }
+    } catch (const plssvm::exception &e) {
+        std::cout << e.what_with_loc() << std::endl;
+        std::terminate();
+    }
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const {
+    PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!");
+
+    std::vector<::plssvm::detail::memory_size> device_memory(this->num_available_devices());
+    switch (space_) {
+        case execution_space::automatic:
+            throw backend_exception{ "Unsupported execution_space::automatic provided!" };
+        case execution_space::cuda:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() {
+                for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) {
+                    device_memory[device_id] = ::plssvm::detail::memory_size{ static_cast<unsigned long long>(devices_[device_id].get<execution_space::cuda>().cuda_device_prop().totalGlobalMem) };
+                }
+            });
+            break;
+        case execution_space::hip:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() {
+                for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) {
+                    device_memory[device_id] = ::plssvm::detail::memory_size{ static_cast<unsigned long long>(devices_[device_id].get<execution_space::hip>().hip_device_prop().totalGlobalMem) };
+                }
+            });
+            break;
+        case execution_space::sycl:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() {
+                for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) {
+                    device_memory[device_id] = ::plssvm::detail::memory_size{ static_cast<unsigned long long>(devices_[device_id].get<execution_space::sycl>().sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) };
+                }
+            });
+            break;
+        case execution_space::hpx:
+        case execution_space::openmp:
+        case execution_space::threads:
+        case execution_space::serial:
+            // NOTE: for these execution spaces, this->num_available_devices will always return 1
+            PLSSVM_ASSERT(this->num_available_devices() == 1, "The host side Kokkos execution spaces should always only be represented using a single device!");
+            device_memory[0] = ::plssvm::detail::get_system_memory();
+            break;
+        // TODO: implement for Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC
+        case execution_space::openmp_target:
+        case execution_space::openacc:
+            throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) };
+    }
+    return device_memory;
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const {
+    PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!");
+
+    std::vector<::plssvm::detail::memory_size> max_mem_alloc_size(this->num_available_devices());
+    switch (space_) {
+        case execution_space::automatic:
+            throw backend_exception{ "Unsupported execution_space::automatic provided!" };
+        case execution_space::cuda:
+        case execution_space::hip:
+            max_mem_alloc_size = this->get_device_memory();
+            break;
+        case execution_space::sycl:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() {
+                for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) {
+                    max_mem_alloc_size[device_id] = ::plssvm::detail::memory_size{ static_cast<unsigned long long>(devices_[device_id].get<execution_space::sycl>().sycl_queue().get_device().get_info<::sycl::info::device::max_mem_alloc_size>()) };
+                }
+            });
+            break;
+        case execution_space::hpx:
+        case execution_space::openmp:
+        case execution_space::threads:
+        case execution_space::serial:
+            max_mem_alloc_size = this->get_device_memory();
+            break;
+        // TODO: implement for Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC
+        case execution_space::openmp_target:
+        case execution_space::openacc:
+            throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) };
+    }
+    return max_mem_alloc_size;
+}
+
+std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const {
+    PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id);
+    PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!");
+
+    // NOTE: the maximum theoretical work-group size, may be additionally limited by the amount of used scratch memory
+    return devices_[device_id].execute_and_return([](const auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        // NOTE: CUDA + HIP + SYCL: returns the maximum possible number of threads, due to no further limitations in the dummy functor (like, e.g., scratch memory)
+        // NOTE: HPX + Serial: hardcoded to 1
+        // NOTE: OpenMP: should be 1-2; most likely 1
+        // NOTE: Threads: should be equal to number of hardware threads IF hwloc is enabled; otherwise 1
+        // NOTE: OpenMPTarget: hardcoded to 256
+        // NOTE: OpenACC: hardcoded to 512
+
+        // NOTE: the functor types doesn't matter -> the dummy class
+        return Kokkos::TeamPolicy<kokkos_execution_space_type>{}.team_size_max(dummy<kokkos_execution_space_type>{}, Kokkos::ParallelForTag{});
+    });
+}
+
+::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::size_t device_id) const {
+    PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id);
+    PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!");
+
+    // NOTE: Kokkos only supports one-dimensional execution ranges!
+    // NOTE: we only use two-dimensional kernels!
+    switch (space_) {
+        case execution_space::automatic:
+            throw backend_exception{ "Unsupported execution_space::automatic provided!" };
+        case execution_space::cuda:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(([&]() -> ::plssvm::detail::dim_type {
+                const cudaDeviceProp &prop = devices_[device_id].get<execution_space::cuda>().cuda_device_prop();
+                const auto max_grid_size = static_cast<unsigned long long>(std::sqrt(prop.maxGridSize[0]));
+                return { max_grid_size, max_grid_size, 1ull };
+            }));
+        case execution_space::hip:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(([&]() -> ::plssvm::detail::dim_type {
+                const hipDeviceProp_t &prop = devices_[device_id].get<execution_space::hip>().hip_device_prop();
+                const auto max_grid_size = static_cast<unsigned long long>(std::sqrt(prop.maxGridSize[0]));
+                return { max_grid_size, max_grid_size, 1ull };
+            }));
+        case execution_space::sycl:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL(([&]() -> ::plssvm::detail::dim_type {
+            // TODO: replace with standardized function if there will be one in the future
+#if defined(SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY)
+                const ::sycl::id<3> native_range = devices_[device_id].get<execution_space::sycl>().sycl_queue().get_device().get_info<::sycl::ext::oneapi::experimental::info::device::max_work_groups<3>>();
+#else
+                // fallback to maximum theoretical value, may break at runtime!
+                ::sycl::id<3> native_range{};
+                const std::size_t max_int32 = std::numeric_limits<std::int32_t>::max();
+                const std::size_t max_uint16 = std::numeric_limits<std::uint16_t>::max();
+                if (target_ == target_platform::cpu) {
+                    native_range = ::sycl::id<3>{ max_int32, max_int32, max_int32 };
+                } else {
+                    native_range = ::sycl::id<3>{ max_int32, max_uint16, max_uint16 };
+                }
+#endif
+                // note: account for SYCL's different iteration range!
+                return { native_range[2], native_range[1], native_range[0] };
+            }));
+        case execution_space::hpx:
+        case execution_space::openmp:
+        case execution_space::threads:
+        case execution_space::serial:
+            return { std::numeric_limits<int>::max(), std::numeric_limits<int>::max(), 1ull };
+        case execution_space::openmp_target:
+        case execution_space::openacc:
+            // TODO: implement for Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC
+            throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) };
+    }
+    // all possible cases should be handled by the previous switch
+    // -> silence missing return statement compiler warnings due to throw statement
+    ::plssvm::detail::unreachable();
+}
+
+//***************************************************//
+//                        fit                        //
+//***************************************************//
+
+auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter &params, const device_ptr_type &data_d, const device_ptr_type &q_red_d, real_type QA_cost) const -> device_ptr_type {
+    const unsigned long long num_rows_reduced = data_d.shape().x - 1;
+    const unsigned long long num_features = data_d.shape().y;
+
+    // calculate the number of data points this device is responsible for
+    const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id);
+
+    // get the offset of the data points this device is responsible for
+    const unsigned long long row_offset = data_distribution_->place_row_offset(device_id);
+
+    // calculate the number of matrix entries
+    const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_);
+    const std::size_t num_entries_padded = dist.calculate_explicit_kernel_matrix_num_entries_padded(device_id);
+
+    device_ptr_type kernel_matrix_d{ num_entries_padded, devices_[device_id] };  // only explicitly store the upper triangular matrix
+    const real_type cost_factor = real_type{ 1.0 } / params.cost;
+    const std::size_t scratch_memory_size = static_cast<std::size_t>(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type);
+
+    // save the team size
+    const int team_size = detail::dim_type_to_native(exec.block);
+
+    return devices_[device_id].execute_and_return([&](auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        constexpr execution_space space = kokkos_type_to_execution_space_v<kokkos_execution_space_type>;
+
+        for (const auto &[partial_grid, offsets] : exec.grids) {
+            // convert execution range partial_grid to Kokkos' native one-dimensional size
+            const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+            // create a Kokkos TeamPolicy
+            Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, team_size };
+
+            switch (params.kernel_type) {
+                case kernel_function_type::linear:
+                    {
+                        using functor_type = detail::device_kernel_assembly<kokkos_execution_space_type, kernel_function_type::linear>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_explicit_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get<space>(), data_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get<space>(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x });
+                    }
+                    break;
+                case kernel_function_type::polynomial:
+                    {
+                        using functor_type = detail::device_kernel_assembly<kokkos_execution_space_type, kernel_function_type::polynomial, decltype(params.degree), real_type, decltype(params.coef0)>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_explicit_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get<space>(), data_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get<space>(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, params.degree, std::get<real_type>(params.gamma), params.coef0 });
+                    }
+                    break;
+                case kernel_function_type::rbf:
+                    {
+                        using functor_type = detail::device_kernel_assembly<kokkos_execution_space_type, kernel_function_type::rbf, real_type>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_explicit_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get<space>(), data_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get<space>(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+                case kernel_function_type::sigmoid:
+                    {
+                        using functor_type = detail::device_kernel_assembly<kokkos_execution_space_type, kernel_function_type::sigmoid, real_type, decltype(params.coef0)>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_explicit_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get<space>(), data_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get<space>(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma), params.coef0 });
+                    }
+                    break;
+                case kernel_function_type::laplacian:
+                    {
+                        using functor_type = detail::device_kernel_assembly<kokkos_execution_space_type, kernel_function_type::laplacian, real_type>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_explicit_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get<space>(), data_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get<space>(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+                case kernel_function_type::chi_squared:
+                    {
+                        using functor_type = detail::device_kernel_assembly<kokkos_execution_space_type, kernel_function_type::chi_squared, real_type>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_explicit_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get<space>(), data_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get<space>(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+            }
+        }
+        detail::device_synchronize(device);
+
+        return std::move(kernel_matrix_d);
+    });
+}
+
+void csvm::run_blas_level_3_kernel_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const ::plssvm::detail::execution_range &mirror_exec, const real_type alpha, const device_ptr_type &A_d, const device_ptr_type &B_d, const real_type beta, device_ptr_type &C_d) const {
+    const unsigned long long num_rhs = B_d.shape().x;
+    const unsigned long long num_rows = B_d.shape().y;
+
+    devices_[device_id].execute([&](auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        constexpr execution_space space = kokkos_type_to_execution_space_v<kokkos_execution_space_type>;
+
+        // calculate the number of data points this device is responsible for
+        const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id);
+        // get the offset of the data points this device is responsible for
+        const unsigned long long row_offset = data_distribution_->place_row_offset(device_id);
+        // the necessary amount of scratch memory for the kernels
+        const std::size_t scratch_memory_size = static_cast<std::size_t>(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type);
+
+        // save the team size
+        const int team_size = detail::dim_type_to_native(exec.block);
+
+        for (const auto &[partial_grid, offsets] : exec.grids) {
+            // convert execution range partial_grid to Kokkos' native one-dimensional size
+            const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+            // create a Kokkos TeamPolicy
+            Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, team_size };
+
+            Kokkos::parallel_for("blas_level_3_kernel_explicit", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm<kokkos_execution_space_type>{ num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, A_d.get().get<space>(), B_d.get().get<space>(), beta, C_d.get().get<space>(), offsets.x, offsets.y, partial_grid.x });
+        }
+
+        // save the team size
+        const int mirror_team_size = detail::dim_type_to_native(mirror_exec.block);
+
+        for (const auto &[partial_grid, offsets] : mirror_exec.grids) {
+            const unsigned long long num_mirror_rows = num_rows - row_offset - device_specific_num_rows;
+
+            if (num_mirror_rows > 0) {
+                // convert execution range partial_grid to Kokkos' native one-dimensional size
+                const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+                // create a Kokkos TeamPolicy
+                Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, mirror_team_size };
+
+                Kokkos::parallel_for("blas_level_3_kernel_explicit_mirror", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm_mirror<kokkos_execution_space_type>{ num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, A_d.get().get<space>(), B_d.get().get<space>(), beta, C_d.get().get<space>(), offsets.x, offsets.y, partial_grid.x });
+            }
+        }
+        detail::device_synchronize(device);
+    });
+}
+
+void csvm::run_inplace_matrix_addition(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const device_ptr_type &rhs_d) const {
+    const unsigned long long num_rhs = lhs_d.shape().x;
+
+    devices_[device_id].execute([&](auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        constexpr execution_space space = kokkos_type_to_execution_space_v<kokkos_execution_space_type>;
+
+        // save the team size
+        const int team_size = detail::dim_type_to_native(exec.block);
+
+        for (const auto &[partial_grid, offsets] : exec.grids) {
+            // convert execution range partial_grid to Kokkos' native one-dimensional size
+            const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+            // create a Kokkos TeamPolicy
+            const Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, team_size };
+
+            Kokkos::parallel_for("inplace_matrix_addition", team_policy, detail::device_kernel_inplace_matrix_add<kokkos_execution_space_type>{ num_rhs, lhs_d.get().get<space>(), rhs_d.get().get<space>(), offsets.x, offsets.y, partial_grid.x });
+        }
+        detail::device_synchronize(device);
+    });
+}
+
+void csvm::run_inplace_matrix_scale(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const real_type scale) const {
+    const unsigned long long num_rhs = lhs_d.shape().x;
+
+    devices_[device_id].execute([&](auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        constexpr execution_space space = kokkos_type_to_execution_space_v<kokkos_execution_space_type>;
+
+        // save the team size
+        const int team_size = detail::dim_type_to_native(exec.block);
+
+        for (const auto &[partial_grid, offsets] : exec.grids) {
+            // convert execution range partial_grid to Kokkos' native one-dimensional size
+            const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+            // create a Kokkos TeamPolicy
+            const Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, team_size };
+
+            Kokkos::parallel_for("inplace_matrix_scale", team_policy, detail::device_kernel_inplace_matrix_scale<kokkos_execution_space_type>{ num_rhs, lhs_d.get().get<space>(), scale, offsets.x, offsets.y, partial_grid.x });
+        }
+        detail::device_synchronize(device);
+    });
+}
+
+void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const real_type alpha, const device_ptr_type &A_d, const parameter &params, const device_ptr_type &q_red, const real_type QA_cost, const device_ptr_type &B_d, device_ptr_type &C_d) const {
+    const unsigned long long num_rows_reduced = A_d.shape().x - 1;
+    const unsigned long long num_features = A_d.shape().y;
+    const unsigned long long num_classes = B_d.shape().x;
+
+    devices_[device_id].execute([&](auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        constexpr execution_space space = kokkos_type_to_execution_space_v<kokkos_execution_space_type>;
+
+        // calculate the number of data points this device is responsible for
+        const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id);
+        // get the offset of the data points this device is responsible for
+        const unsigned long long row_offset = data_distribution_->place_row_offset(device_id);
+
+        const real_type cost_factor = real_type{ 1.0 } / params.cost;
+        const std::size_t scratch_memory_size = static_cast<std::size_t>(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type);
+
+        // save the team size
+        const int team_size = detail::dim_type_to_native(exec.block);
+
+        for (const auto &[partial_grid, offsets] : exec.grids) {
+            // convert execution range partial_grid to Kokkos' native one-dimensional size
+            const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+            // create a Kokkos TeamPolicy
+            Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, team_size };
+
+            switch (params.kernel_type) {
+                case kernel_function_type::linear:
+                    {
+                        using functor_type = detail::device_kernel_assembly_symm<kokkos_execution_space_type, kernel_function_type::linear>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get<space>(), A_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get<space>(), C_d.get().get<space>(), num_classes, offsets.x, offsets.y, partial_grid.x });
+                    }
+                    break;
+                case kernel_function_type::polynomial:
+                    {
+                        using functor_type = detail::device_kernel_assembly_symm<kokkos_execution_space_type, kernel_function_type::polynomial, decltype(params.degree), real_type, decltype(params.coef0)>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get<space>(), A_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get<space>(), C_d.get().get<space>(), num_classes, offsets.x, offsets.y, partial_grid.x, params.degree, std::get<real_type>(params.gamma), params.coef0 });
+                    }
+                    break;
+                case kernel_function_type::rbf:
+                    {
+                        using functor_type = detail::device_kernel_assembly_symm<kokkos_execution_space_type, kernel_function_type::rbf, real_type>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get<space>(), A_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get<space>(), C_d.get().get<space>(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+                case kernel_function_type::sigmoid:
+                    {
+                        using functor_type = detail::device_kernel_assembly_symm<kokkos_execution_space_type, kernel_function_type::sigmoid, real_type, decltype(params.coef0)>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get<space>(), A_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get<space>(), C_d.get().get<space>(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma), params.coef0 });
+                    }
+                    break;
+                case kernel_function_type::laplacian:
+                    {
+                        using functor_type = detail::device_kernel_assembly_symm<kokkos_execution_space_type, kernel_function_type::laplacian, real_type>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get<space>(), A_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get<space>(), C_d.get().get<space>(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+                case kernel_function_type::chi_squared:
+                    {
+                        using functor_type = detail::device_kernel_assembly_symm<kokkos_execution_space_type, kernel_function_type::chi_squared, real_type>;
+                        Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get<space>(), A_d.get().get<space>(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get<space>(), C_d.get().get<space>(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+            }
+        }
+        detail::device_synchronize(device);
+    });
+}
+
+//***************************************************//
+//                   predict, score                  //
+//***************************************************//
+
+auto csvm::run_w_kernel(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const device_ptr_type &alpha_d, const device_ptr_type &sv_d) const -> device_ptr_type {
+    const unsigned long long num_classes = alpha_d.shape().x;
+    const unsigned long long num_sv = alpha_d.shape().y;
+    const unsigned long long device_specific_num_sv = sv_d.shape().x;
+    const unsigned long long num_features = sv_d.shape().y;
+
+    // get the offset of the data points this device is responsible for
+    const unsigned long long sv_offset = data_distribution_->place_row_offset(device_id);
+
+    device_ptr_type w_d{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, devices_[device_id] };
+
+    const std::size_t scratch_memory_size = static_cast<std::size_t>(2u * THREAD_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type);
+
+    // save the team size
+    const int team_size = detail::dim_type_to_native(exec.block);
+
+    return devices_[device_id].execute_and_return([&](auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        constexpr execution_space space = kokkos_type_to_execution_space_v<kokkos_execution_space_type>;
+
+        for (const auto &[partial_grid, offsets] : exec.grids) {
+            // convert execution range partial_grid to Kokkos' native one-dimensional size
+            const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+            // create a Kokkos TeamPolicy
+            Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, team_size };
+
+            Kokkos::parallel_for("w_kernel", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_w_linear<kokkos_execution_space_type>{ w_d.get().get<space>(), alpha_d.get().get<space>(), sv_d.get().get<space>(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y, partial_grid.x });
+        }
+        detail::device_synchronize(device);
+
+        return std::move(w_d);
+    });
+}
+
+auto csvm::run_predict_kernel(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter &params, const device_ptr_type &alpha_d, const device_ptr_type &rho_d, const device_ptr_type &sv_or_w_d, const device_ptr_type &predict_points_d) const -> device_ptr_type {
+    const unsigned long long num_classes = alpha_d.shape().x;
+    const unsigned long long num_predict_points = predict_points_d.shape().x;  // = device_specific_num_rows
+    const unsigned long long num_features = predict_points_d.shape().y;
+    const unsigned long long num_sv = sv_or_w_d.shape().x;
+
+    device_ptr_type out_d{ shape{ num_predict_points, num_classes }, shape{ PADDING_SIZE, PADDING_SIZE }, devices_[device_id] };
+
+    const std::size_t scratch_memory_size = static_cast<std::size_t>(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type);
+
+    // save the team size
+    const int team_size = detail::dim_type_to_native(exec.block);
+
+    return devices_[device_id].execute_and_return([&](auto &device) {
+        using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(device)>;
+        constexpr execution_space space = kokkos_type_to_execution_space_v<kokkos_execution_space_type>;
+
+        for (const auto &[partial_grid, offsets] : exec.grids) {
+            // convert execution range partial_grid to Kokkos' native one-dimensional size
+            const int native_partial_grid = detail::dim_type_to_native(partial_grid);
+
+            // create a Kokkos TeamPolicy
+            Kokkos::TeamPolicy<kokkos_execution_space_type> team_policy{ device, native_partial_grid, team_size };
+
+            switch (params.kernel_type) {
+                case kernel_function_type::linear:
+                    {
+                        using functor_type = detail::device_kernel_predict_linear<kokkos_execution_space_type>;
+                        Kokkos::parallel_for("predict_kernel_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get<space>(), sv_or_w_d.get().get<space>(), rho_d.get().get<space>(), predict_points_d.get().get<space>(), num_classes, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x });
+                    }
+                    break;
+                case kernel_function_type::polynomial:
+                    {
+                        using functor_type = detail::device_kernel_predict<kokkos_execution_space_type, kernel_function_type::polynomial, decltype(params.degree), real_type, decltype(params.coef0)>;
+                        Kokkos::parallel_for("predict_kernel_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get<space>(), alpha_d.get().get<space>(), rho_d.get().get<space>(), sv_or_w_d.get().get<space>(), predict_points_d.get().get<space>(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, params.degree, std::get<real_type>(params.gamma), params.coef0 });
+                    }
+                    break;
+                case kernel_function_type::rbf:
+                    {
+                        using functor_type = detail::device_kernel_predict<kokkos_execution_space_type, kernel_function_type::rbf, real_type>;
+                        Kokkos::parallel_for("predict_kernel_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get<space>(), alpha_d.get().get<space>(), rho_d.get().get<space>(), sv_or_w_d.get().get<space>(), predict_points_d.get().get<space>(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+                case kernel_function_type::sigmoid:
+                    {
+                        using functor_type = detail::device_kernel_predict<kokkos_execution_space_type, kernel_function_type::sigmoid, real_type, decltype(params.coef0)>;
+                        Kokkos::parallel_for("predict_kernel_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get<space>(), alpha_d.get().get<space>(), rho_d.get().get<space>(), sv_or_w_d.get().get<space>(), predict_points_d.get().get<space>(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma), params.coef0 });
+                    }
+                    break;
+                case kernel_function_type::laplacian:
+                    {
+                        using functor_type = detail::device_kernel_predict<kokkos_execution_space_type, kernel_function_type::laplacian, real_type>;
+                        Kokkos::parallel_for("predict_kernel_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get<space>(), alpha_d.get().get<space>(), rho_d.get().get<space>(), sv_or_w_d.get().get<space>(), predict_points_d.get().get<space>(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+                case kernel_function_type::chi_squared:
+                    {
+                        using functor_type = detail::device_kernel_predict<kokkos_execution_space_type, kernel_function_type::chi_squared, real_type>;
+                        Kokkos::parallel_for("predict_kernel_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get<space>(), alpha_d.get().get<space>(), rho_d.get().get<space>(), sv_or_w_d.get().get<space>(), predict_points_d.get().get<space>(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get<real_type>(params.gamma) });
+                    }
+                    break;
+            }
+        }
+        detail::device_synchronize(device);
+
+        return std::move(out_d);
+    });
+}
+
+}  // namespace plssvm::kokkos
diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp
new file mode 100644
index 000000000..0dfe9adc0
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp
@@ -0,0 +1,186 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/detail/device_ptr.hpp"
+
+#include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp"  // plssvm::kokkos::detail::{device_view_wrapper, make_device_view_wrapper}
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"       // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/detail/utility.hpp"              // plssvm::detail::device_synchronize
+#include "plssvm/backends/Kokkos/exceptions.hpp"                  // plssvm::kokkos::backend_exception
+#include "plssvm/detail/assert.hpp"                               // PLSSVM_ASSERT
+#include "plssvm/detail/type_traits.hpp"                          // plssvm::detail::remove_cvref_t
+#include "plssvm/shape.hpp"                                       // plssvm::shape
+
+#include "Kokkos_Core.hpp"  // Kokkos::View, Kokkos::HostSpace, Kokkos::MemoryUnmanaged, Kokkos::subview, Kokkos::parallel_for, Kokkos::deep_copy
+
+#include "fmt/core.h"  // fmt::format
+
+#include <algorithm>  // std::min
+#include <cstddef>    // std::size_t
+#include <cstring>    // std::memcpy
+#include <utility>    // std::make_pair
+#include <vector>     // std::vector
+
+namespace plssvm::kokkos::detail {
+
+/**
+ * @brief Typedef for a simple Kokkos::View always targeting the Kokkos::HostSpace.
+ * @tparam T the type of the view's data
+ */
+template <typename T>
+using host_view_type = Kokkos::View<T *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
+
+template <typename T>
+device_ptr<T>::device_ptr(const size_type size, const device_wrapper &device) :
+    device_ptr{ plssvm::shape{ size, 1 }, plssvm::shape{ 0, 0 }, device } { }
+
+template <typename T>
+device_ptr<T>::device_ptr(const plssvm::shape shape, const device_wrapper &device) :
+    device_ptr{ shape, plssvm::shape{ 0, 0 }, device } { }
+
+template <typename T>
+device_ptr<T>::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const device_wrapper &device) :
+    base_type{ shape, padding, device } {
+    data_ = make_device_view_wrapper<T *>(device, this->size_padded());
+    this->memset(0);
+}
+
+template <typename T>
+void device_ptr<T>::memset(const int pattern, const size_type pos, const size_type num_bytes) {
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
+
+    if (pos >= this->size_padded()) {
+        throw backend_exception{ fmt::format("Illegal access in memset!: {} >= {}", pos, this->size_padded()) };
+    }
+    const size_type rnum_bytes = std::min(num_bytes, (this->size_padded() - pos) * sizeof(value_type));
+
+    data_.execute([&](const auto &data) {
+        queue_.execute([&](const auto &exec) {
+            using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t<decltype(exec)>;
+
+            // create view of the device data cast to unsigned char
+            const Kokkos::View<unsigned char *, kokkos_execution_space_type> view{ reinterpret_cast<unsigned char *>(data.data() + pos), rnum_bytes };
+            // fill the view with the pattern -> acts like a memset
+            Kokkos::deep_copy(exec, view, static_cast<unsigned char>(pattern));
+        });
+    });
+
+    detail::device_synchronize(queue_);
+}
+
+template <typename T>
+void device_ptr<T>::fill(const value_type value, const size_type pos, const size_type count) {
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
+
+    if (pos >= this->size_padded()) {
+        throw backend_exception{ fmt::format("Illegal access in fill!: {} >= {}", pos, this->size_padded()) };
+    }
+    const size_type rcount = std::min(count, this->size_padded() - pos);
+
+    data_.execute([&](const auto &data) {
+        // create subview of the device data
+        auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount));
+        queue_.execute([&](const auto &exec) {
+            // fill subview with constant data
+            Kokkos::deep_copy(exec, data_subview, value);
+        });
+    });
+
+    detail::device_synchronize(queue_);
+}
+
+template <typename T>
+void device_ptr<T>::copy_to_device(const_host_pointer_type data_to_copy, const size_type pos, const size_type count) {
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!");
+
+    const size_type rcount = std::min(count, this->size_padded() - pos);
+
+    data_.execute([&](const auto &data) {
+        // create view of the host data
+        const host_view_type<const T> host_view{ data_to_copy, rcount };
+        // create subview of the device data
+        auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount));
+        queue_.execute([&](const auto &exec) {
+            // fill subview with constant data
+            Kokkos::deep_copy(exec, data_subview, host_view);
+        });
+    });
+
+    detail::device_synchronize(queue_);
+}
+
+template <typename T>
+void device_ptr<T>::copy_to_device_strided(const_host_pointer_type data_to_copy, const std::size_t spitch, const std::size_t width, const std::size_t height) {
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!");
+
+    if (width > spitch) {
+        throw backend_exception{ fmt::format("Invalid width and spitch combination specified (width: {} <= spitch: {})!", width, spitch) };
+    }
+
+    // TODO: strided copy to device in Kokkos currently not possible
+    if (spitch == width) {
+        // can use normal copy since we have no line strides
+        this->copy_to_device(data_to_copy, 0, width * height);
+    } else {
+        std::vector<value_type> temp(this->shape_padded().x * height, value_type{ 0.0 });
+        value_type *pos = temp.data();
+        for (std::size_t row = 0; row < height; ++row) {
+            std::memcpy(pos, data_to_copy + row * spitch, width * sizeof(value_type));
+            pos += this->shape_padded().x;
+        }
+        this->copy_to_device(temp);
+    }
+
+    detail::device_synchronize(queue_);
+}
+
+template <typename T>
+void device_ptr<T>::copy_to_host(host_pointer_type buffer, const size_type pos, const size_type count) const {
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(buffer != nullptr, "Invalid host pointer for the data to copy!");
+
+    const size_type rcount = std::min(count, this->size_padded() - pos);
+
+    data_.execute([&](const auto &data) {
+        // create view of the host data
+        const host_view_type<T> host_view{ buffer, rcount };
+        // create subview of the device data
+        auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount));
+        queue_.execute([&](const auto &exec) {
+            // fill subview with constant data
+            Kokkos::deep_copy(exec, host_view, data_subview);
+        });
+    });
+
+    detail::device_synchronize(queue_);
+}
+
+template <typename T>
+void device_ptr<T>::copy_to_other_device(device_ptr &target, const size_type pos, const size_type count) const {
+    PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?");
+    PLSSVM_ASSERT(target.get() != device_pointer_type{}, "Invalid target pointer! Maybe target has been default constructed?");
+
+    const size_type rcount = std::min(count, this->size_padded() - pos);
+    if (target.size_padded() < rcount) {
+        throw backend_exception{ fmt::format("Buffer too small to perform copy (needed: {}, provided: {})!", rcount, target.size_padded()) };
+    }
+
+    // TODO: use Kokkos function?
+    std::vector<value_type> temp(rcount);
+    this->copy_to_host(temp, pos, rcount);
+    target.copy_to_device(temp);
+
+    detail::device_synchronize(queue_);
+}
+
+template class device_ptr<float>;
+template class device_ptr<double>;
+
+}  // namespace plssvm::kokkos::detail
diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp
new file mode 100644
index 000000000..35dd6c2e9
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp
@@ -0,0 +1,148 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"
+
+#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp"  // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_*
+#include "plssvm/backends/Kokkos/exceptions.hpp"                    // plssvm::kokkos::backend_exception
+#include "plssvm/backends/Kokkos/execution_space.hpp"               // plssvm::kokkos::execution_space
+#include "plssvm/detail/assert.hpp"                                 // PLSSVM_ASSERT
+#include "plssvm/detail/logging_without_performance_tracking.hpp"   // plssvm::detail::log_untracked
+#include "plssvm/detail/string_utility.hpp"                         // plssvm::detail::as_lower_case
+#include "plssvm/detail/utility.hpp"                                // plssvm::detail::contains
+#include "plssvm/target_platforms.hpp"                              // plssvm::target_platform
+#include "plssvm/verbosity_levels.hpp"                              // plssvm::verbosity_level
+
+#include "Kokkos_Core.hpp"  // Kokkos::num_devices, Kokkos::ExecutionSpace
+
+#include <vector>  // std::vector
+
+#if defined(KOKKOS_ENABLE_CUDA)
+    #define PLSSVM_CUDA_ERROR_CHECK(err)                                                                                                            \
+        if ((err) != cudaSuccess) {                                                                                                                 \
+            throw plssvm::kokkos::backend_exception{ fmt::format("Kokkos::Cuda assert '{}': {}", cudaGetErrorName(err), cudaGetErrorString(err)) }; \
+        }
+#endif
+
+#if defined(KOKKOS_ENABLE_HIP)
+    #define PLSSVM_HIP_ERROR_CHECK(err)                                                                                                  \
+        if ((err) != hipSuccess) {                                                                                                       \
+            throw plssvm::kokkos::backend_exception{ fmt::format("HIP assert '{}': {}", hipGetErrorName(err), hipGetErrorString(err)) }; \
+        }
+#endif
+
+namespace plssvm::kokkos::detail {
+
+std::vector<device_wrapper> get_device_list(const execution_space space, [[maybe_unused]] const target_platform target) {
+    PLSSVM_ASSERT(space != execution_space::automatic, "The automatic execution_space may not be provided to this function!");
+
+    std::vector<device_wrapper> devices{};
+    switch (space) {
+        case execution_space::automatic:
+            throw backend_exception{ "Unsupported execution_space::automatic provided!" };
+        case execution_space::cuda:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() {
+                for (int device = 0; device < Kokkos::num_devices(); ++device) {
+                    // create CUDA stream using the CUDA specific functions
+                    PLSSVM_CUDA_ERROR_CHECK(cudaSetDevice(device));
+                    cudaStream_t stream{};
+                    PLSSVM_CUDA_ERROR_CHECK(cudaStreamCreate(&stream));
+                    // create Kokkos execution space for the specific device
+                    // Note: it is important to pass the cudaStream_t lifetime to be managed by Kokkos
+                    devices.emplace_back(Kokkos::Cuda(stream, Kokkos::Impl::ManageStream::yes));
+                }
+            });
+            break;
+        case execution_space::hip:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() {
+                for (int device = 0; device < Kokkos::num_devices(); ++device) {
+                    // HIP CUDA stream using the HIP specific functions
+                    PLSSVM_HIP_ERROR_CHECK(hipSetDevice(device));
+                    hipStream_t stream{};
+                    PLSSVM_HIP_ERROR_CHECK(hipStreamCreate(&stream));
+                    // create Kokkos execution space for the specific device
+                    // Note: it is important to pass the hipStream_t lifetime to be managed by Kokkos
+                    devices.emplace_back(Kokkos::HIP(stream, Kokkos::Impl::ManageStream::yes));
+                }
+            });
+            break;
+        case execution_space::sycl:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(([&]() {
+                // all user provided sycl::queues must be in-order queues
+                ::sycl::property_list props{ ::sycl::property::queue::in_order{} };
+
+                for (const auto &platform : ::sycl::platform::get_platforms()) {
+                    for (const auto &device : platform.get_devices()) {
+                        // Note: Kokkos is IntelLLVM/DPC++/icpx only
+                        if (device.is_cpu() && target == target_platform::cpu) {
+                            devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } });
+                        } else if (device.is_gpu()) {
+                            // the current device is a GPU
+                            // get vendor string and convert it to all lower case
+                            const std::string vendor_string = ::plssvm::detail::as_lower_case(device.get_info<::sycl::info::device::vendor>());
+                            // get platform name of current GPU device and convert it to all lower case
+                            const std::string platform_string = ::plssvm::detail::as_lower_case(platform.get_info<::sycl::info::platform::name>());
+
+                            // check vendor string and insert to correct target platform
+                            if (::plssvm::detail::contains(vendor_string, "nvidia") && target == target_platform::gpu_nvidia) {
+                                devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } });
+                            } else if ((::plssvm::detail::contains(vendor_string, "amd") || ::plssvm::detail::contains(vendor_string, "advanced micro devices")) && target == target_platform::gpu_amd) {
+                                devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } });
+                            } else if (::plssvm::detail::contains(vendor_string, "intel") && target == target_platform::gpu_intel) {
+                                devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } });
+                            }
+                        }
+                    }
+                }
+            }));
+            break;
+        case execution_space::hpx:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX([&]() {
+                devices.emplace_back(Kokkos::Experimental::HPX{});
+            });
+            break;
+        case execution_space::openmp:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP([&]() {
+                // Note: if OpenMP should be used as device  must be set in order for it to work!
+                if (omp_get_nested() == 0) {
+                    ::plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                                    "WARNING: In order for Kokkos::OpenMP to work properly, we have to set \"omp_set_nested(1)\"!\n");
+                    // enable OMP_NESTED support
+                    // Note: function is officially deprecated but still necessary for Kokkos::OpenMP to work properly
+                    omp_set_nested(1);
+                }
+                devices.emplace_back(Kokkos::OpenMP{});
+            });
+            break;
+        case execution_space::openmp_target:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET([&]() {
+                // TODO: implement multi-GPU support?
+                devices.emplace_back(Kokkos::Experimental::OpenMPTarget{});
+            });
+            break;
+        case execution_space::openacc:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC([&]() {
+                // TODO: implement multi-GPU support?
+                devices.emplace_back(Kokkos::Experimental::OpenACC{});
+            });
+            break;
+        case execution_space::threads:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS([&]() {
+                devices.emplace_back(Kokkos::Threads{});
+            });
+            break;
+        case execution_space::serial:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL([&]() {
+                devices.emplace_back(Kokkos::Serial{});
+            });
+            break;
+    }
+    return devices;
+}
+
+}  // namespace plssvm::kokkos::detail
diff --git a/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp
new file mode 100644
index 000000000..919cbdaa1
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp
@@ -0,0 +1,46 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp"
+
+#include "plssvm/backends/host_pinned_memory.hpp"  // plssvm::detail::host_pinned_memory
+#include "plssvm/exceptions/exceptions.hpp"        // plssvm::exception
+
+#include <cstddef>    // std::size_t
+#include <exception>  // std::terminate
+#include <iostream>   // std::cerr, std::endl
+#include <vector>     // std::vector
+
+namespace plssvm::kokkos::detail {
+
+template <typename T>
+pinned_memory<T>::pinned_memory(const std::vector<T> &vec) :
+    pinned_memory{ vec.data(), vec.size() } { }
+
+template <typename T>
+pinned_memory<T>::pinned_memory(const T *ptr, const std::size_t size) :
+    ::plssvm::detail::host_pinned_memory<T>{ ptr } {
+    this->pin_memory(size * sizeof(T));
+}
+
+template <typename T>
+pinned_memory<T>::~pinned_memory() {
+    try {
+        if (is_pinned_ && ptr_ != nullptr) {
+            this->unpin_memory();
+        }
+    } catch (const plssvm::exception &e) {
+        std::cerr << e.what_with_loc() << std::endl;
+        std::terminate();
+    }
+}
+
+template class pinned_memory<float>;
+template class pinned_memory<double>;
+
+}  // namespace plssvm::kokkos::detail
diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp
new file mode 100644
index 000000000..5dc3f8cda
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/detail/utility.cpp
@@ -0,0 +1,168 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/detail/utility.hpp"
+
+#include "plssvm/backends/execution_range.hpp"                      // plssvm::detail::dim_type
+#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp"  // PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_*
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"         // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/execution_space.hpp"               // plssvm::kokkos::execution_space
+#include "plssvm/detail/assert.hpp"                                 // PLSSVM_ASSERT
+#include "plssvm/detail/string_utility.hpp"                         // plssvm::detail::as_lower_case
+#include "plssvm/detail/utility.hpp"                                // plssvm::detail::contains
+#include "plssvm/target_platforms.hpp"                              // plssvm::target_platform
+
+#include "Kokkos_Core.hpp"    // Kokkos::ExecutionSpace, Kokkos::Impl::ManageStream
+#include "Kokkos_Macros.hpp"  // Kokkos macros
+
+#include "fmt/core.h"  // fmt::format
+
+#include <map>            // std::map
+#include <string>         // std::string
+#include <unordered_set>  // std::unordered_set
+#include <vector>         // std::vector
+
+namespace plssvm::kokkos::detail {
+
+int dim_type_to_native(const ::plssvm::detail::dim_type &dims) {
+    return static_cast<int>(dims.x * dims.y * dims.z);
+}
+
+std::map<target_platform, std::vector<execution_space>> available_target_platform_to_execution_space_mapping() {
+    std::map<target_platform, std::vector<execution_space>> available_map{};
+
+    // iterate over all available execution spaces
+    for (const execution_space space : list_available_execution_spaces()) {
+        switch (space) {
+            case execution_space::automatic:
+                // nothing to do here
+                break;
+            case execution_space::cuda:
+                // NVIDIA GPUs only
+                available_map[target_platform::gpu_nvidia].push_back(execution_space::cuda);
+                break;
+            case execution_space::hip:
+                // NVIDIA or AMD GPUs possible (both simultaneously are unsupported)
+                PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() {
+#if defined(__HIP_PLATFORM_AMD__)
+                    available_map[target_platform::gpu_amd].push_back(execution_space::hip);
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+                    available_map[target_platform::gpu_nvidia].push_back(execution_space::hip);
+#endif
+                });
+                break;
+            case execution_space::sycl:
+                // list all potential target platforms currently available in SYCL
+                PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() {
+                    std::unordered_set<target_platform> targets{};
+                    for (const auto &platform : ::sycl::platform::get_platforms()) {
+                        for (const auto &device : platform.get_devices()) {
+                            // Note: Kokkos is Intel LLVM/DPC++/icpx only
+                            if (device.is_cpu()) {
+                                targets.insert(target_platform::cpu);
+                            } else if (device.is_gpu()) {
+                                // the current device is a GPU
+                                // get vendor string and convert it to all lower case
+                                const std::string vendor_string = ::plssvm::detail::as_lower_case(device.get_info<::sycl::info::device::vendor>());
+                                // get platform name of current GPU device and convert it to all lower case
+                                const std::string platform_string = ::plssvm::detail::as_lower_case(platform.get_info<::sycl::info::platform::name>());
+
+                                // check vendor string and insert to correct target platform
+                                if (::plssvm::detail::contains(vendor_string, "nvidia")) {
+                                    targets.insert(target_platform::gpu_nvidia);
+                                } else if (::plssvm::detail::contains(vendor_string, "amd") || ::plssvm::detail::contains(vendor_string, "advanced micro devices")) {
+                                    targets.insert(target_platform::gpu_amd);
+                                } else if (::plssvm::detail::contains(vendor_string, "intel")) {
+                                    targets.insert(target_platform::gpu_intel);
+                                }
+                            }
+                        }
+                    }
+                    // now we know which target platforms are available in SYCL -> add them to our mapping
+                    for (const target_platform target : targets) {
+                        available_map[target].push_back(execution_space::sycl);
+                    }
+                });
+                break;
+            case execution_space::openacc:
+                // TODO: restrict to available devices
+                // all GPUs and CPU possible
+                available_map[target_platform::gpu_nvidia].push_back(execution_space::sycl);
+                available_map[target_platform::gpu_amd].push_back(execution_space::sycl);
+                available_map[target_platform::gpu_intel].push_back(execution_space::sycl);
+                available_map[target_platform::cpu].push_back(execution_space::sycl);
+                break;
+            case execution_space::openmp_target:
+                // TODO: restrict to available devices
+                // all GPUs
+                available_map[target_platform::gpu_nvidia].push_back(execution_space::openmp_target);
+                available_map[target_platform::gpu_amd].push_back(execution_space::openmp_target);
+                available_map[target_platform::gpu_intel].push_back(execution_space::openmp_target);
+                break;
+            case execution_space::hpx:
+            case execution_space::openmp:
+            case execution_space::threads:
+            case execution_space::serial:
+                // all these execution spaces are CPU only
+                available_map[target_platform::cpu].push_back(space);
+                break;
+        }
+    }
+
+    // the map must at least have one entry
+    PLSSVM_ASSERT(!available_map.empty(), "At least one target platform must be available!");
+    // the automatic target platform must not be present
+    PLSSVM_ASSERT(!::plssvm::detail::contains(available_map, target_platform::automatic), "The automatic target platform may not be present!");
+
+    return available_map;
+}
+
+std::string get_device_name([[maybe_unused]] const device_wrapper &dev) {
+    switch (dev.get_execution_space()) {
+        case execution_space::automatic:
+            throw backend_exception{ "Unsupported execution_space::automatic provided!" };
+        case execution_space::cuda:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA([&]() {
+                return std::string{ dev.get<execution_space::cuda>().cuda_device_prop().name };
+            });
+        case execution_space::hip:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP([&]() {
+                return std::string{ dev.get<execution_space::hip>().hip_device_prop().name };
+            });
+        case execution_space::sycl:
+            PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL([&]() {
+                return dev.get<execution_space::sycl>().sycl_queue().get_device().get_info<::sycl::info::device::name>();
+            });
+        case execution_space::hpx:
+            return "HPX CPU host device";
+        case execution_space::openmp:
+            return "OpenMP CPU host device";
+        case execution_space::openmp_target:
+            return "OpenMP target device";
+        case execution_space::openacc:
+            return "OpenACC target device";
+        case execution_space::threads:
+            return "std::threads CPU host device";
+        case execution_space::serial:
+            return "serial CPU host device";
+    }
+    return "unknown";
+}
+
+void device_synchronize(const device_wrapper &dev) {
+    dev.execute([](const auto &device) {
+        device.fence();
+    });
+}
+
+std::string get_kokkos_version() {
+    // get the Kokkos version
+    return fmt::format("{}.{}.{}", KOKKOS_VERSION_MAJOR, KOKKOS_VERSION_MINOR, KOKKOS_VERSION_PATCH);
+}
+
+}  // namespace plssvm::kokkos::detail
diff --git a/src/plssvm/backends/Kokkos/exceptions.cpp b/src/plssvm/backends/Kokkos/exceptions.cpp
new file mode 100644
index 000000000..4186e4008
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/exceptions.cpp
@@ -0,0 +1,21 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/exceptions.hpp"
+
+#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
+
+#include <string>  // std::string
+
+namespace plssvm::kokkos {
+
+backend_exception::backend_exception(const std::string &msg, source_location loc) :
+    ::plssvm::exception{ msg, "kokkos::backend_exception", loc } { }
+
+}  // namespace plssvm::kokkos
diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp
new file mode 100644
index 000000000..0caae212f
--- /dev/null
+++ b/src/plssvm/backends/Kokkos/execution_space.cpp
@@ -0,0 +1,89 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/Kokkos/execution_space.hpp"
+
+#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp"  // plssvm::kokkos::detail::constexpr_available_execution_spaces
+#include "plssvm/detail/string_utility.hpp"                                        // plssvm::detail::to_lower_case
+
+#include <array>    // std::array
+#include <ios>      // std::ios::failbit
+#include <istream>  // std::istream
+#include <ostream>  // std::ostream
+#include <string>   // std::string
+#include <vector>   // std::vector
+
+namespace plssvm::kokkos {
+
+std::ostream &operator<<(std::ostream &out, const execution_space space) {
+    switch (space) {
+        case execution_space::automatic:
+            return out << "automatic";
+        case execution_space::cuda:
+            return out << "Cuda";
+        case execution_space::hip:
+            return out << "HIP";
+        case execution_space::sycl:
+            return out << "SYCL";
+        case execution_space::hpx:
+            return out << "HPX";
+        case execution_space::openmp:
+            return out << "OpenMP";
+        case execution_space::openmp_target:
+            return out << "OpenMPTarget";
+        case execution_space::openacc:
+            return out << "OpenACC";
+        case execution_space::threads:
+            return out << "Threads";
+        case execution_space::serial:
+            return out << "Serial";
+    }
+    return out << "unknown";
+}
+
+std::istream &operator>>(std::istream &in, execution_space &space) {
+    std::string str{};
+    in >> str;
+    ::plssvm::detail::to_lower_case(str);
+
+    if (str == "automatic" || str == "auto") {
+        space = execution_space::automatic;
+    } else if (str == "cuda") {
+        space = execution_space::cuda;
+    } else if (str == "hip") {
+        space = execution_space::hip;
+    } else if (str == "sycl") {
+        space = execution_space::sycl;
+    } else if (str == "hpx") {
+        space = execution_space::hpx;
+    } else if (str == "openmp") {
+        space = execution_space::openmp;
+    } else if (str == "openmp_target" || str == "openmptarget") {
+        space = execution_space::openmp_target;
+    } else if (str == "openacc") {
+        space = execution_space::openacc;
+    } else if (str == "threads" || str == "std::threads") {
+        space = execution_space::threads;
+    } else if (str == "serial") {
+        space = execution_space::serial;
+    } else {
+        in.setstate(std::ios::failbit);
+    }
+    return in;
+}
+
+std::vector<execution_space> list_available_execution_spaces() {
+    // always add the automatic execution space
+    std::vector<execution_space> spaces{ execution_space::automatic };
+    // add all other available execution spaces
+    constexpr auto arr = detail::constexpr_available_execution_spaces();
+    spaces.insert(spaces.cend(), arr.begin(), arr.end());
+    return spaces;
+}
+
+}  // namespace plssvm::kokkos
diff --git a/src/plssvm/detail/cmd/parser_predict.cpp b/src/plssvm/detail/cmd/parser_predict.cpp
index c1a8a5be3..656d9a76d 100644
--- a/src/plssvm/detail/cmd/parser_predict.cpp
+++ b/src/plssvm/detail/cmd/parser_predict.cpp
@@ -9,6 +9,7 @@
 #include "plssvm/detail/cmd/parser_predict.hpp"
 
 #include "plssvm/backend_types.hpp"                                // plssvm::list_available_backends
+#include "plssvm/backends/Kokkos/execution_space.hpp"              // plssvm::kokkos::list_available_execution_spaces
 #include "plssvm/backends/SYCL/implementation_types.hpp"           // plssvm::sycl::list_available_sycl_implementations
 #include "plssvm/constants.hpp"                                    // plssvm::real_type
 #include "plssvm/detail/assert.hpp"                                // PLSSVM_ASSERT
@@ -51,6 +52,9 @@ parser_predict::parser_predict(int argc, char **argv) {
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
             ("sycl_implementation_type", fmt::format("choose the SYCL implementation to be used in the SYCL backend: {}", fmt::join(sycl::list_available_sycl_implementations(), "|")), cxxopts::value<sycl::implementation_type>()->default_value(fmt::format("{}", sycl_implementation_type)))
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+            ("kokkos_execution_space", fmt::format("choose the Kokkos execution space to be used in the Kokkos backend: {}", fmt::join(kokkos::list_available_execution_spaces(), "|")), cxxopts::value<decltype(kokkos_execution_space)>()->default_value(fmt::format("{}", kokkos_execution_space)))
+#endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
            ("performance_tracking", "the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr", cxxopts::value<decltype(performance_tracking_filename)>())
 #endif
@@ -101,18 +105,38 @@ parser_predict::parser_predict(int argc, char **argv) {
     target = result["target_platform"].as<decltype(target)>();
 
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
-    // parse SYCL implementation used in the SYCL backend
-    sycl_implementation_type = result["sycl_implementation_type"].as<decltype(sycl_implementation_type)>();
-
-    // assembly warning condition
-    const std::vector<plssvm::target_platform> target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target };
-    const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl);
-
-    // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend
-    if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) {
-        detail::log_untracked(verbosity_level::full | verbosity_level::warning,
-                              "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n",
-                              sycl_implementation_type);
+    {
+        // parse SYCL implementation used in the SYCL backend
+        sycl_implementation_type = result["sycl_implementation_type"].as<decltype(sycl_implementation_type)>();
+
+        // assembly warning condition
+        const std::vector<plssvm::target_platform> target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target };
+        const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl);
+
+        // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend
+        if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) {
+            detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n",
+                                  sycl_implementation_type);
+        }
+    }
+#endif
+
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    {
+        // parse execution space when using Kokkos as backend
+        kokkos_execution_space = result["kokkos_execution_space"].as<decltype(kokkos_execution_space)>();
+
+        // assemble warning condition
+        const std::vector<plssvm::target_platform> target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target };
+        const bool kokkos_backend_is_used = backend == backend_type::kokkos || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::kokkos);
+
+        // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend
+        if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) {
+            detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n",
+                                  kokkos_execution_space);
+        }
     }
 #endif
 
@@ -178,6 +202,10 @@ std::ostream &operator<<(std::ostream &out, const parser_predict &params) {
         out << fmt::format("SYCL implementation type: {}\n", params.sycl_implementation_type);
     }
 
+    if (params.backend == backend_type::kokkos || params.backend == backend_type::automatic) {
+        out << fmt::format("Kokkos execution space: {}\n", params.kokkos_execution_space);
+    }
+
     out << fmt::format(
         "label_type: {}\n"
         "real_type: {}\n"
diff --git a/src/plssvm/detail/cmd/parser_train.cpp b/src/plssvm/detail/cmd/parser_train.cpp
index d0cc4cb26..31d5b8719 100644
--- a/src/plssvm/detail/cmd/parser_train.cpp
+++ b/src/plssvm/detail/cmd/parser_train.cpp
@@ -9,6 +9,7 @@
 #include "plssvm/detail/cmd/parser_train.hpp"
 
 #include "plssvm/backend_types.hpp"                                // plssvm::list_available_backends, plssvm::determine_default_backend
+#include "plssvm/backends/Kokkos/execution_space.hpp"              // plssvm::kokkos::{list_available_execution_spaces, execution_space}
 #include "plssvm/backends/SYCL/implementation_types.hpp"           // plssvm::sycl::{list_available_sycl_implementations, implementation_type}
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"        // plssvm::sycl::kernel_invocation_type
 #include "plssvm/classification_types.hpp"                         // plssvm::classification_type, plssvm::classification_type_to_full_string
@@ -77,6 +78,9 @@ parser_train::parser_train(int argc, char **argv) {
            ("sycl_kernel_invocation_type", "choose the kernel invocation type when using SYCL as backend: automatic|nd_range", cxxopts::value<decltype(sycl_kernel_invocation_type)>()->default_value(fmt::format("{}", sycl_kernel_invocation_type)))
            ("sycl_implementation_type", fmt::format("choose the SYCL implementation to be used in the SYCL backend: {}", fmt::join(sycl::list_available_sycl_implementations(), "|")), cxxopts::value<decltype(sycl_implementation_type)>()->default_value(fmt::format("{}", sycl_implementation_type)))
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+            ("kokkos_execution_space", fmt::format("choose the Kokkos execution space to be used in the Kokkos backend: {}", fmt::join(kokkos::list_available_execution_spaces(), "|")), cxxopts::value<decltype(kokkos_execution_space)>()->default_value(fmt::format("{}", kokkos_execution_space)))
+#endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
            ("performance_tracking", "the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr", cxxopts::value<decltype(performance_tracking_filename)>())
 #endif
@@ -185,28 +189,48 @@ parser_train::parser_train(int argc, char **argv) {
     solver = result["solver"].as<decltype(solver)>();
 
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
-    // parse kernel invocation type when using SYCL as backend
-    sycl_kernel_invocation_type = result["sycl_kernel_invocation_type"].as<decltype(sycl_kernel_invocation_type)>();
-
-    // assembly warning condition
-    const std::vector<plssvm::target_platform> target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target };
-    const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl);
-
-    // warn if kernel invocation type is explicitly set but SYCL isn't the current (automatic) backend
-    if (!sycl_backend_is_used && sycl_kernel_invocation_type != sycl::kernel_invocation_type::automatic) {
-        detail::log_untracked(verbosity_level::full | verbosity_level::warning,
-                              "WARNING: explicitly set a SYCL kernel invocation type but the current backend isn't SYCL; ignoring --sycl_kernel_invocation_type={}\n",
-                              sycl_kernel_invocation_type);
+    {
+        // parse kernel invocation type when using SYCL as backend
+        sycl_kernel_invocation_type = result["sycl_kernel_invocation_type"].as<decltype(sycl_kernel_invocation_type)>();
+
+        // assemble warning condition
+        const std::vector<plssvm::target_platform> target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target };
+        const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl);
+
+        // warn if kernel invocation type is explicitly set but SYCL isn't the current (automatic) backend
+        if (!sycl_backend_is_used && sycl_kernel_invocation_type != sycl::kernel_invocation_type::automatic) {
+            detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  "WARNING: explicitly set a SYCL kernel invocation type but the current backend isn't SYCL; ignoring --sycl_kernel_invocation_type={}\n",
+                                  sycl_kernel_invocation_type);
+        }
+
+        // parse SYCL implementation used in the SYCL backend
+        sycl_implementation_type = result["sycl_implementation_type"].as<decltype(sycl_implementation_type)>();
+
+        // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend
+        if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) {
+            detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n",
+                                  sycl_implementation_type);
+        }
     }
+#endif
 
-    // parse SYCL implementation used in the SYCL backend
-    sycl_implementation_type = result["sycl_implementation_type"].as<decltype(sycl_implementation_type)>();
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    {
+        // parse execution space when using Kokkos as backend
+        kokkos_execution_space = result["kokkos_execution_space"].as<decltype(kokkos_execution_space)>();
 
-    // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend
-    if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) {
-        detail::log_untracked(verbosity_level::full | verbosity_level::warning,
-                              "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n",
-                              sycl_implementation_type);
+        // assemble warning condition
+        const std::vector<plssvm::target_platform> target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target };
+        const bool kokkos_backend_is_used = backend == backend_type::kokkos || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::kokkos);
+
+        // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend
+        if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) {
+            detail::log_untracked(verbosity_level::full | verbosity_level::warning,
+                                  "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n",
+                                  kokkos_execution_space);
+        }
     }
 #endif
 
@@ -302,6 +326,10 @@ std::ostream &operator<<(std::ostream &out, const parser_train &params) {
             params.sycl_kernel_invocation_type);
     }
 
+    if (params.backend == backend_type::kokkos || params.backend == backend_type::automatic) {
+        out << fmt::format("Kokkos execution space: {}\n", params.kokkos_execution_space);
+    }
+
     out << fmt::format(
         "classification_type: {}\n"
         "label_type: {}\n"
diff --git a/src/plssvm/detail/tracking/performance_tracker.cpp b/src/plssvm/detail/tracking/performance_tracker.cpp
index 26ebda7d3..6d1323e8e 100644
--- a/src/plssvm/detail/tracking/performance_tracker.cpp
+++ b/src/plssvm/detail/tracking/performance_tracker.cpp
@@ -116,6 +116,7 @@ void performance_tracker::add_tracking_entry(const tracking_entry<cmd::parser_tr
         tracking_entries_[entry.entry_category].emplace("target", std::vector<std::string>{ fmt::format("{}", entry.entry_value.target) });
         tracking_entries_[entry.entry_category].emplace("sycl_kernel_invocation_type", std::vector<std::string>{ fmt::format("{}", entry.entry_value.sycl_kernel_invocation_type) });
         tracking_entries_[entry.entry_category].emplace("sycl_implementation_type", std::vector<std::string>{ fmt::format("{}", entry.entry_value.sycl_implementation_type) });
+        tracking_entries_[entry.entry_category].emplace("kokkos_execution_space", std::vector<std::string>{ fmt::format("{}", entry.entry_value.kokkos_execution_space) });
         tracking_entries_[entry.entry_category].emplace("strings_as_labels", std::vector<std::string>{ fmt::format("{}", entry.entry_value.strings_as_labels) });
         tracking_entries_[entry.entry_category].emplace("real_type", std::vector<std::string>{ std::string{ arithmetic_type_name<real_type>() } });
         tracking_entries_[entry.entry_category].emplace("input_filename", std::vector<std::string>{ fmt::format("\"{}\"", entry.entry_value.input_filename) });
@@ -133,6 +134,7 @@ void performance_tracker::add_tracking_entry(const tracking_entry<cmd::parser_pr
         tracking_entries_[entry.entry_category].emplace("backend", std::vector<std::string>{ fmt::format("{}", entry.entry_value.backend) });
         tracking_entries_[entry.entry_category].emplace("target", std::vector<std::string>{ fmt::format("{}", entry.entry_value.target) });
         tracking_entries_[entry.entry_category].emplace("sycl_implementation_type", std::vector<std::string>{ fmt::format("{}", entry.entry_value.sycl_implementation_type) });
+        tracking_entries_[entry.entry_category].emplace("kokkos_execution_space", std::vector<std::string>{ fmt::format("{}", entry.entry_value.kokkos_execution_space) });
         tracking_entries_[entry.entry_category].emplace("strings_as_labels", std::vector<std::string>{ fmt::format("{}", entry.entry_value.strings_as_labels) });
         tracking_entries_[entry.entry_category].emplace("real_type", std::vector<std::string>{ std::string{ arithmetic_type_name<real_type>() } });
         tracking_entries_[entry.entry_category].emplace("input_filename", std::vector<std::string>{ fmt::format("\"{}\"", entry.entry_value.input_filename) });
@@ -297,6 +299,14 @@ void performance_tracker::save(std::ostream &out) {
         "  ADAPTIVECPP_with_accelerated_CPU:  {}\n",
         adaptivecpp_sscp,
         adaptivecpp_accelerated_cpu);
+#endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    // check whether Kokkos::SYCL AOT has been enabled
+    constexpr bool kokkos_sycl_aot = PLSSVM_IS_DEFINED(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT);
+
+    out << fmt::format(
+        "  KOKKOS_sycl_intel_llvm_with_aot:   {}\n",
+        kokkos_sycl_aot);
 #endif
     out << "\n";
 
diff --git a/src/plssvm/target_platforms.cpp b/src/plssvm/target_platforms.cpp
index f5569b51d..8fc47e223 100644
--- a/src/plssvm/target_platforms.cpp
+++ b/src/plssvm/target_platforms.cpp
@@ -22,9 +22,6 @@ namespace plssvm {
 
 std::vector<target_platform> list_available_target_platforms() {
     std::vector<target_platform> available_targets = { target_platform::automatic };
-#if defined(PLSSVM_HAS_CPU_TARGET)
-    available_targets.push_back(target_platform::cpu);
-#endif
 #if defined(PLSSVM_HAS_NVIDIA_TARGET)
     available_targets.push_back(target_platform::gpu_nvidia);
 #endif
@@ -34,6 +31,9 @@ std::vector<target_platform> list_available_target_platforms() {
 #if defined(PLSSVM_HAS_INTEL_TARGET)
     available_targets.push_back(target_platform::gpu_intel);
 #endif
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    available_targets.push_back(target_platform::cpu);
+#endif
 
     // automatic is ALWAYS available but AT LEAST ONE other target must be available in addition
     PLSSVM_ASSERT(available_targets.size() > 1, "Besides \"automatic\" at least one other target must be available!");
diff --git a/tests/backend_types.cpp b/tests/backend_types.cpp
index 4b0f27aae..8a735a26b 100644
--- a/tests/backend_types.cpp
+++ b/tests/backend_types.cpp
@@ -40,11 +40,12 @@ TEST(BackendType, to_string) {
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::hip, "hip");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::opencl, "opencl");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::sycl, "sycl");
+    EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::kokkos, "kokkos");
 }
 
 TEST(BackendType, to_string_unknown) {
     // check conversions to std::string from unknown backend_type
-    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::backend_type>(8), "unknown");
+    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::backend_type>(9), "unknown");
 }
 
 // check whether the std::string -> plssvm::backend_type conversions are correct
@@ -68,6 +69,8 @@ TEST(BackendType, from_string) {
     EXPECT_CONVERSION_FROM_STRING("OpenCL", plssvm::backend_type::opencl);
     EXPECT_CONVERSION_FROM_STRING("sycl", plssvm::backend_type::sycl);
     EXPECT_CONVERSION_FROM_STRING("SYCL", plssvm::backend_type::sycl);
+    EXPECT_CONVERSION_FROM_STRING("Kokkos", plssvm::backend_type::kokkos);
+    EXPECT_CONVERSION_FROM_STRING("KOKKOS", plssvm::backend_type::kokkos);
 }
 
 TEST(BackendType, from_string_unknown) {
@@ -133,6 +136,7 @@ INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeSupportedCombination, ::testing
          supported_combination_type{ { plssvm::backend_type::hip }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::hip },
          supported_combination_type{ { plssvm::backend_type::opencl }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::opencl },
          supported_combination_type{ { plssvm::backend_type::sycl }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::sycl },
+         supported_combination_type{ { plssvm::backend_type::kokkos }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::kokkos },
          supported_combination_type{ { plssvm::backend_type::openmp, plssvm::backend_type::cuda, plssvm::backend_type::hip, plssvm::backend_type::opencl, plssvm::backend_type::sycl }, { plssvm::target_platform::cpu }, plssvm::backend_type::sycl },
          supported_combination_type{ { plssvm::backend_type::openmp, plssvm::backend_type::cuda, plssvm::backend_type::hip, plssvm::backend_type::opencl, plssvm::backend_type::sycl }, { plssvm::target_platform::gpu_nvidia }, plssvm::backend_type::cuda },
          supported_combination_type{ { plssvm::backend_type::openmp, plssvm::backend_type::cuda, plssvm::backend_type::hip, plssvm::backend_type::opencl, plssvm::backend_type::sycl }, { plssvm::target_platform::gpu_amd }, plssvm::backend_type::hip },
@@ -151,6 +155,7 @@ TEST(BackendType, csvm_to_backend_type) {
     EXPECT_EQ(plssvm::csvm_to_backend_type<volatile plssvm::sycl::csvm>::value, plssvm::backend_type::sycl);
     EXPECT_EQ(plssvm::csvm_to_backend_type<const volatile plssvm::adaptivecpp::csvm>::value, plssvm::backend_type::sycl);
     EXPECT_EQ(plssvm::csvm_to_backend_type<const volatile plssvm::dpcpp::csvm &>::value, plssvm::backend_type::sycl);
+    EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::kokkos::csvm>::value, plssvm::backend_type::kokkos);
 
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::adaptivecpp::csvm>::impl, plssvm::sycl::implementation_type::adaptivecpp);
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::dpcpp::csvm>::impl, plssvm::sycl::implementation_type::dpcpp);
@@ -167,4 +172,5 @@ TEST(BackendType, csvm_to_backend_type_v) {
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<volatile plssvm::sycl::csvm>, plssvm::backend_type::sycl);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<const volatile plssvm::adaptivecpp::csvm>, plssvm::backend_type::sycl);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<const volatile plssvm::dpcpp::csvm &>, plssvm::backend_type::sycl);
+    EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::kokkos::csvm>, plssvm::backend_type::kokkos);
 }
diff --git a/tests/backends/CMakeLists.txt b/tests/backends/CMakeLists.txt
index ec6a5fa76..dffe57615 100644
--- a/tests/backends/CMakeLists.txt
+++ b/tests/backends/CMakeLists.txt
@@ -38,3 +38,8 @@ endif ()
 if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME})
     add_subdirectory(SYCL)
 endif ()
+
+# create Kokkos tests if the Kokkos backend is available
+if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME})
+    add_subdirectory(Kokkos)
+endif ()
diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt
new file mode 100644
index 000000000..f29367a27
--- /dev/null
+++ b/tests/backends/Kokkos/CMakeLists.txt
@@ -0,0 +1,54 @@
+## Authors: Alexander Van Craen, Marcel Breyer
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+## create Kokkos tests
+set(PLSSVM_KOKKOS_TEST_NAME Kokkos_tests)
+
+# list all necessary sources
+set(PLSSVM_KOKKOS_TEST_SOURCES
+    ${CMAKE_CURRENT_LIST_DIR}/detail/constexpr_available_execution_spaces.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/device_view_wrapper.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/device_wrapper.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/standard_layout_tuple.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/execution_space.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/execution_space_type_traits.cpp
+)
+
+find_package(Kokkos REQUIRED)
+
+# add test executable
+add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.cpp ${PLSSVM_KOKKOS_TEST_SOURCES})
+
+if (Kokkos_ENABLE_CUDA)
+    # fix template limit when using Kokkos::Cuda
+    target_compile_options(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE -Xcudafe --pending_instantiations=0)
+    
+    # tests won't compile with nvcc
+    if (NOT PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES)
+        message(FATAL_ERROR "Due to template instantiation limits within nvcc, only reduced label type tests are currently supported!")
+    endif ()
+endif ()
+
+# increase recursive template instantiation limit
+target_compile_options(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:GNU,Clang,IntelLLVM>:-ftemplate-depth=2048>>)
+
+# link against test library
+target_link_libraries(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME})
+
+# add tests to google test
+include(GoogleTest)
+include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
+discover_tests_with_death_test_filter(${PLSSVM_KOKKOS_TEST_NAME})
+
+# add test as coverage dependency
+if (TARGET coverage)
+    add_dependencies(coverage ${PLSSVM_KOKKOS_TEST_NAME})
+endif ()
\ No newline at end of file
diff --git a/tests/backends/Kokkos/detail/constexpr_available_execution_spaces.cpp b/tests/backends/Kokkos/detail/constexpr_available_execution_spaces.cpp
new file mode 100644
index 000000000..2e8f064e7
--- /dev/null
+++ b/tests/backends/Kokkos/detail/constexpr_available_execution_spaces.cpp
@@ -0,0 +1,18 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the Kokkos `constexpr_available_execution_spaces()` function.
+ */
+
+#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp"
+
+#include "gtest/gtest.h"  // TEST, EXPECT_TRUE, EXPECT_FALSE
+
+TEST(KokkosConstexprAvailableExecutionSpaces, constexpr_available_execution_spaces) {
+    // at least one execution space must always be available
+    EXPECT_FALSE(plssvm::kokkos::detail::constexpr_available_execution_spaces().empty());
+}
diff --git a/tests/backends/Kokkos/detail/device_ptr.cpp b/tests/backends/Kokkos/detail/device_ptr.cpp
new file mode 100644
index 000000000..ec525dad5
--- /dev/null
+++ b/tests/backends/Kokkos/detail/device_ptr.cpp
@@ -0,0 +1,55 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the Kokkos backend device pointer.
+ */
+
+#include "plssvm/backends/Kokkos/detail/device_ptr.hpp"  // plssvm::kokkos::detail::device_ptr
+
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"        // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/execution_space.hpp"              // plssvm::kokkos::execution_space
+#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp"  // plssvm::kokkos::execution_space_to_kokkos_type_t
+
+#include "tests/backends/generic_device_ptr_tests.hpp"  // generic device pointer tests to instantiate
+#include "tests/backends/Kokkos/utility.hpp"            // util::create_kokkos_test_tuple_impl
+#include "tests/naming.hpp"                             // naming::test_parameter_to_name
+#include "tests/types_to_test.hpp"                      // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list},
+                                                        // util::detail::concat_tuple_types_t
+
+#include "gtest/gtest.h"  // INSTANTIATE_TYPED_TEST_SUITE_P
+
+#include <tuple>  // std::tuple
+
+template <typename T, plssvm::kokkos::execution_space exec_space>
+struct kokkos_device_ptr_test_type {
+    using device_ptr_type = plssvm::kokkos::detail::device_ptr<T>;
+    using queue_type = plssvm::kokkos::detail::device_wrapper;
+    constexpr static plssvm::kokkos::execution_space space = exec_space;
+
+    static const queue_type &default_queue() {
+        static const queue_type queue{ plssvm::kokkos::execution_space_to_kokkos_type_t<space>{} };
+        return queue;
+    }
+};
+
+template <plssvm::kokkos::execution_space space>
+using kokkos_device_ptr_test_type_float = kokkos_device_ptr_test_type<float, space>;
+template <plssvm::kokkos::execution_space space>
+using kokkos_device_ptr_test_type_double = kokkos_device_ptr_test_type<double, space>;
+
+using kokkos_device_ptr_tuple = util::detail::concat_tuple_types_t<util::create_kokkos_test_tuple_t<kokkos_device_ptr_test_type_float>,
+                                                                   util::create_kokkos_test_tuple_t<kokkos_device_ptr_test_type_double>>;
+
+// the tests used in the instantiated GTest test suites
+using kokkos_device_ptr_type_gtest = util::combine_test_parameters_gtest_t<util::cartesian_type_product_t<kokkos_device_ptr_tuple>>;
+using kokkos_device_ptr_layout_type_gtest = util::combine_test_parameters_gtest_t<util::cartesian_type_product_t<kokkos_device_ptr_tuple>, util::layout_type_list>;
+
+// instantiate type-parameterized tests
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosDevicePtr, DevicePtr, kokkos_device_ptr_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosDevicePtr, DevicePtrLayout, kokkos_device_ptr_layout_type_gtest, naming::test_parameter_to_name);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosDevicePtrDeathTest, DevicePtrDeathTest, kokkos_device_ptr_type_gtest, naming::test_parameter_to_name);
diff --git a/tests/backends/Kokkos/detail/device_view_wrapper.cpp b/tests/backends/Kokkos/detail/device_view_wrapper.cpp
new file mode 100644
index 000000000..28dc97cba
--- /dev/null
+++ b/tests/backends/Kokkos/detail/device_view_wrapper.cpp
@@ -0,0 +1,95 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the device_view_wrapper class.
+ */
+
+#include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp"
+
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"  // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v}
+
+#include "Kokkos_Core.hpp"  // Kokkos::DefaultExecutionSpace, Kokkos::View
+
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE
+
+#include <cstddef>  // std::size_t
+
+TEST(KokkosDeviceViewWrapper, default_construct) {
+    // default construct a device view wrapper
+    const plssvm::kokkos::detail::device_view_wrapper<double *> view{};
+
+    // per std::variant specification, the first type in the underlying variant is now the active member
+    // -> this always corresponds to the first entry in our constexpr_available_execution_spaces array
+    constexpr auto spaces = plssvm::kokkos::detail::constexpr_available_execution_spaces();
+    EXPECT_EQ(view.get_execution_space(), spaces.front());
+}
+
+TEST(KokkosDeviceViewWrapper, construct) {
+    // construct a device view wrapper using the current Kokkos::DefaultExecutionSpace
+    const plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View<double *, Kokkos::DefaultExecutionSpace>{} };
+
+    // check that the device view is associated with the correct execution space
+    EXPECT_EQ(view.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>);
+}
+
+TEST(KokkosDeviceViewWrapper, get) {
+    // construct a device view wrapper using the current Kokkos::DefaultExecutionSpace
+    plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View<double *, Kokkos::DefaultExecutionSpace>{} };
+
+    // check that the returned Kokkos::View has the correct type
+    constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>;
+    ::testing::StaticAssertTypeEq<decltype(view.get<space>()), Kokkos::View<double *, Kokkos::DefaultExecutionSpace> &>();
+}
+
+TEST(KokkosDeviceViewWrapper, get_const) {
+    // construct a device view wrapper using the current Kokkos::DefaultExecutionSpace
+    const plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View<int **, Kokkos::DefaultExecutionSpace>{} };
+
+    // check that the returned Kokkos::View has the correct type
+    constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>;
+    ::testing::StaticAssertTypeEq<decltype(view.get<space>()), const Kokkos::View<int **, Kokkos::DefaultExecutionSpace> &>();
+}
+
+TEST(KokkosDeviceViewWrapper, get_execution_space) {
+    // construct a device wrapper using the current Kokkos::DefaultExecutionSpace
+    const plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View<double *, Kokkos::DefaultExecutionSpace>{} };
+
+    // check that the device view is associated with the correct execution space
+    EXPECT_EQ(view.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>);
+}
+
+TEST(KokkosDeviceViewWrapper, equality) {
+    const plssvm::kokkos::detail::device_view_wrapper view1{ Kokkos::View<double *, Kokkos::DefaultExecutionSpace>{} };
+    const plssvm::kokkos::detail::device_view_wrapper view2{ Kokkos::View<double *, Kokkos::DefaultExecutionSpace>{} };
+
+    // should be equal
+    EXPECT_TRUE(view1 == view2);
+}
+
+TEST(KokkosDeviceViewWrapper, inequality) {
+    const plssvm::kokkos::detail::device_view_wrapper view1{ Kokkos::View<double *, Kokkos::DefaultExecutionSpace>{} };
+    const plssvm::kokkos::detail::device_view_wrapper view2{ Kokkos::View<double *, Kokkos::DefaultExecutionSpace>{} };
+
+    // should not be unequal
+    EXPECT_FALSE(view1 != view2);
+}
+
+TEST(KokkosDeviceViewWrapper, make_device_view_wrapper) {
+    // create a device wrapper for the Kokkos::DefaultExecutionSpace
+    const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} };
+
+    // create device view wrapper
+    const plssvm::kokkos::detail::device_view_wrapper<double *> view = plssvm::kokkos::detail::make_device_view_wrapper<double *>(device, 42);
+
+    // check that the returned Kokkos::View has the correct type
+    constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>;
+    ::testing::StaticAssertTypeEq<decltype(view.get<space>()), const Kokkos::View<double *, Kokkos::DefaultExecutionSpace> &>();
+
+    // check the number of elements
+    EXPECT_EQ(view.get<space>().size(), std::size_t{ 42 });
+}
diff --git a/tests/backends/Kokkos/detail/device_wrapper.cpp b/tests/backends/Kokkos/detail/device_wrapper.cpp
new file mode 100644
index 000000000..ca644ece7
--- /dev/null
+++ b/tests/backends/Kokkos/detail/device_wrapper.cpp
@@ -0,0 +1,115 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the device_wrapper class.
+ */
+
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"
+
+#include "plssvm/backends/Kokkos/detail/utility.hpp"   // plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping
+#include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v}
+#include "plssvm/detail/utility.hpp"                   // plssvm::detail::contains
+#include "plssvm/target_platforms.hpp"                 // plssvm::target_platform
+
+#include "Kokkos_Core.hpp"  // Kokkos::DefaultExecutionSpace
+
+#include "tests/utility.hpp"  // util::for_each_variant_type
+
+#include "gtest/gtest.h"  // TEST, EXPECT_GE, EXPECT_EQ
+
+#include <vector>  // std::vector
+
+TEST(KokkosDeviceWrapper, default_construct) {
+    // default construct a device wrapper
+    const plssvm::kokkos::detail::device_wrapper device{};
+
+    // per std::variant specification, the first type in the underlying variant is now the active member
+    // -> this always corresponds to the first entry in our constexpr_available_execution_spaces array
+    constexpr auto spaces = plssvm::kokkos::detail::constexpr_available_execution_spaces();
+    EXPECT_EQ(device.get_execution_space(), spaces.front());
+}
+
+TEST(KokkosDeviceWrapper, construct) {
+    // construct a device wrapper using the current Kokkos::DefaultExecutionSpace
+    const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} };
+
+    // check that the device is associated with the correct execution space
+    EXPECT_EQ(device.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>);
+}
+
+TEST(KokkosDeviceWrapper, get) {
+    // construct a device wrapper using the current Kokkos::DefaultExecutionSpace
+    plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} };
+
+    // check that the returned Kokkos::ExecutionSpace has the correct type
+    constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>;
+    ::testing::StaticAssertTypeEq<decltype(device.get<space>()), Kokkos::DefaultExecutionSpace &>();
+}
+
+TEST(KokkosDeviceWrapper, get_const) {
+    // construct a device wrapper using the current Kokkos::DefaultExecutionSpace
+    const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} };
+
+    // check that the returned Kokkos::ExecutionSpace has the correct type
+    constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>;
+    ::testing::StaticAssertTypeEq<decltype(device.get<space>()), const Kokkos::DefaultExecutionSpace &>();
+}
+
+TEST(KokkosDeviceWrapper, get_execution_space) {
+    // construct a device wrapper using the current Kokkos::DefaultExecutionSpace
+    const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} };
+
+    // check that the device is associated with the correct execution space
+    EXPECT_EQ(device.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::DefaultExecutionSpace>);
+}
+
+TEST(KokkosDeviceWrapper, equality) {
+    const plssvm::kokkos::detail::device_wrapper device1{ Kokkos::DefaultExecutionSpace{} };
+    const plssvm::kokkos::detail::device_wrapper device2{ Kokkos::DefaultExecutionSpace{} };
+
+    // should be equal
+    EXPECT_TRUE(device1 == device2);
+}
+
+TEST(KokkosDeviceWrapper, inequality) {
+    const plssvm::kokkos::detail::device_wrapper device1{ Kokkos::DefaultExecutionSpace{} };
+    const plssvm::kokkos::detail::device_wrapper device2{ Kokkos::DefaultExecutionSpace{} };
+
+    // should not be unequal
+    EXPECT_FALSE(device1 != device2);
+}
+
+struct device_list_test {
+    template <typename ExecutionSpace>
+    void operator()() const {
+        // get the default device list
+        const plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v<ExecutionSpace>;
+        plssvm::target_platform default_target{};
+        for (const auto &[target, spaces] : plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping()) {
+            if (::plssvm::detail::contains(spaces, space)) {
+                default_target = target;
+                break;
+            }
+        }
+        const std::vector<plssvm::kokkos::detail::device_wrapper> devices = plssvm::kokkos::detail::get_device_list(space, default_target);
+
+        // check the number of returned devices
+        if (space == plssvm::kokkos::execution_space::cuda || space == plssvm::kokkos::execution_space::hip || space == plssvm::kokkos::execution_space::sycl) {
+            // TODO: Change if multi-GPU support for Kokkos::Experimental::OpenMPTarget and/or Kokkos::Experimental::OpenACC is implemented
+            // for the device execution spaces AT LEAST ONE device must be found
+            EXPECT_GE(devices.size(), 1);
+        } else {
+            // for all other execution spaces EXACTLY ONE device must be found
+            EXPECT_EQ(devices.size(), 1);
+        }
+    }
+};
+
+TEST(KokkosDeviceWrapper, get_device_list) {
+    using variant_type = typename plssvm::kokkos::detail::impl::create_device_variant_type::type;
+    util::for_each_variant_type<variant_type>(device_list_test{});
+}
diff --git a/tests/backends/Kokkos/detail/pinned_memory.cpp b/tests/backends/Kokkos/detail/pinned_memory.cpp
new file mode 100644
index 000000000..2569e68e7
--- /dev/null
+++ b/tests/backends/Kokkos/detail/pinned_memory.cpp
@@ -0,0 +1,39 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the Kokkos backend pinned memory.
+ */
+
+#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp"  // plssvm::kokkos::detail::pinned_memory
+
+#include "tests/backends/generic_pinned_memory_tests.hpp"  // generic pinned memory tests to instantiate
+#include "tests/naming.hpp"                                // naming::test_parameter_to_name
+#include "tests/types_to_test.hpp"                         // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list}
+
+#include "gtest/gtest.h"  // INSTANTIATE_TYPED_TEST_SUITE_P
+
+#include <tuple>  // std::tuple
+
+template <typename T>
+struct kokkos_pinned_memory_test_type {
+    using pinned_memory_type = plssvm::kokkos::detail::pinned_memory<T>;
+
+    constexpr static bool can_pin = false;
+};
+
+using kokkos_pinned_memory_tuple = std::tuple<kokkos_pinned_memory_test_type<float>, kokkos_pinned_memory_test_type<double>>;
+
+// the tests used in the instantiated GTest test suites
+using kokkos_pinned_memory_type_gtest = util::combine_test_parameters_gtest_t<util::cartesian_type_product_t<kokkos_pinned_memory_tuple>>;
+using kokkos_pinned_memory_layout_type_gtest = util::combine_test_parameters_gtest_t<util::cartesian_type_product_t<kokkos_pinned_memory_tuple>, util::layout_type_list>;
+
+// instantiate type-parameterized tests
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemory, PinnedMemory, kokkos_pinned_memory_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemory, PinnedMemoryLayout, kokkos_pinned_memory_layout_type_gtest, naming::test_parameter_to_name);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemoryDeathTest, PinnedMemoryDeathTest, kokkos_pinned_memory_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemoryDeathTest, PinnedMemoryLayoutDeathTest, kokkos_pinned_memory_layout_type_gtest, naming::test_parameter_to_name);
diff --git a/tests/backends/Kokkos/detail/standard_layout_tuple.cpp b/tests/backends/Kokkos/detail/standard_layout_tuple.cpp
new file mode 100644
index 000000000..7b4fb6cd8
--- /dev/null
+++ b/tests/backends/Kokkos/detail/standard_layout_tuple.cpp
@@ -0,0 +1,33 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the custom standard layout tuple implementation necessary for Kokkos.
+ */
+
+#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp"  // plssvm::kokkos::detail::{standard_layout_tuple, make_standard_layout_tuple, get}
+
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, testing::StaticAssertTypeEq
+
+#include <type_traits>  // std::remove_const_t
+
+TEST(KokkosStandardLayoutTuple, make_standard_layout_tuple) {
+    // create a new standard layout tuple
+    [[maybe_unused]] const auto tuple = plssvm::kokkos::detail::make_standard_layout_tuple(true, 42, 3.1415);
+
+    // check the tuple type
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::detail::standard_layout_tuple<bool, int, double>, std::remove_const_t<decltype(tuple)>>();
+}
+
+TEST(KokkosStandardLayoutTuple, get) {
+    // create a new standard layout tuple
+    const auto tuple = plssvm::kokkos::detail::make_standard_layout_tuple(true, 42, 3.1415);
+
+    // check getter functions
+    EXPECT_EQ(plssvm::kokkos::detail::get<0>(tuple), true);
+    EXPECT_EQ(plssvm::kokkos::detail::get<1>(tuple), 42);
+    EXPECT_EQ(plssvm::kokkos::detail::get<2>(tuple), 3.1415);
+}
diff --git a/tests/backends/Kokkos/detail/utility.cpp b/tests/backends/Kokkos/detail/utility.cpp
new file mode 100644
index 000000000..ec18a977b
--- /dev/null
+++ b/tests/backends/Kokkos/detail/utility.cpp
@@ -0,0 +1,93 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the custom utility functions related to the Kokkos backend.
+ */
+
+#include "plssvm/backends/Kokkos/detail/utility.hpp"
+
+#include "plssvm/backends/execution_range.hpp"               // plssvm::detail::dim_type
+#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp"  // plssvm::kokkos::detail::device_wrapper
+#include "plssvm/backends/Kokkos/exceptions.hpp"             // plssvm::kokkos::backend_exception
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v}
+#include "plssvm/detail/utility.hpp"                         // plssvm::detail::contains
+#include "plssvm/target_platforms.hpp"                       // plssvm::target_platform
+
+#include "Kokkos_Core.hpp"  // Kokkos::ExecutionSpace
+
+#include "tests/custom_test_macros.hpp"  // EXPECT_THROW_WHAT
+#include "tests/utility.hpp"             // util::for_each_variant_type
+
+#include "fmt/core.h"     // fmt::format
+#include "gmock/gmock.h"  // EXPECT_THAT; ::testing::AnyOf
+#include "gtest/gtest.h"  // TEST, EXPECT_NE
+
+#include <map>      // std::map
+#include <regex>    // std::regex, std::regex::extended, std::regex_match
+#include <string>   // std::string
+#include <variant>  // std::variant
+#include <vector>   // std::vector
+
+TEST(KokkosUtility, is_type_in_variant) {
+    // check type trait that determines if a type is contained in a type trait
+    using variant_type = std::variant<int, double, bool, std::string>;
+
+    EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v<int, variant_type>) );
+    EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v<double, variant_type>) );
+    EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v<bool, variant_type>) );
+    EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v<std::string, variant_type>) );
+    EXPECT_FALSE((plssvm::kokkos::detail::impl::is_type_in_variant_v<short, variant_type>) );
+    EXPECT_FALSE((plssvm::kokkos::detail::impl::is_type_in_variant_v<float, variant_type>) );
+}
+
+TEST(KokkosUtility, dim_type_to_native) {
+    // create a dim_type
+    constexpr plssvm::detail::dim_type dim{ 128ull, 64ull, 32ull };
+
+    // convert it to a Kokkos one-dimensional execution range
+    const int native_dim = plssvm::kokkos::detail::dim_type_to_native(dim);
+
+    // check values for correctness
+    EXPECT_EQ(native_dim, 262'144);  // = 128 * 64 * 32
+}
+
+TEST(KokkosUtility, available_target_platform_to_execution_space_mapping) {
+    // get the target_platform <-> execution_space mappings
+    const std::map<plssvm::target_platform, std::vector<plssvm::kokkos::execution_space>> mapping = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping();
+
+    // the map must not be empty
+    EXPECT_FALSE(mapping.empty());
+
+    // each vector must at least have one entry + the automatic target platform must not be present
+    for (const auto &[target, spaces] : mapping) {
+        EXPECT_NE(target, plssvm::target_platform::automatic);
+        EXPECT_GE(spaces.size(), 1);
+    }
+}
+
+struct device_name_test {
+    template <typename ExecutionSpace>
+    void operator()() const {
+        // get the device name of the default Kokkos execution space
+        const std::string name = plssvm::kokkos::detail::get_device_name(plssvm::kokkos::detail::device_wrapper{ ExecutionSpace{} });
+        SCOPED_TRACE(name);
+
+        // the returned device name may not be empty or unknown
+        EXPECT_FALSE(name.empty());
+        EXPECT_NE(name, std::string{ "unknown" });
+    }
+};
+
+TEST(KokkosUtility, get_device_name) {
+    using variant_type = typename plssvm::kokkos::detail::impl::create_device_variant_type::type;
+    util::for_each_variant_type<variant_type>(device_name_test{});
+}
+
+TEST(KokkosUtility, get_kokkos_version) {
+    const std::regex reg{ "[0-9]+\\.[0-9]+\\.[0-9]+", std::regex::extended };
+    EXPECT_TRUE(std::regex_match(plssvm::kokkos::detail::get_kokkos_version(), reg));
+}
diff --git a/tests/backends/Kokkos/exceptions.cpp b/tests/backends/Kokkos/exceptions.cpp
new file mode 100644
index 000000000..d78ac7801
--- /dev/null
+++ b/tests/backends/Kokkos/exceptions.cpp
@@ -0,0 +1,25 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the custom exception classes related to the Kokkos backend.
+ */
+
+#include "plssvm/backends/Kokkos/exceptions.hpp"  // plssvm::kokkos::backend_exception
+
+#include "tests/backends/generic_exceptions_tests.hpp"  // generic exception tests to instantiate
+
+#include "gtest/gtest.h"  // INSTANTIATE_TYPED_TEST_SUITE_P
+
+#include <string_view>  // std::string_view
+
+struct exception_test_type {
+    using exception_type = plssvm::kokkos::backend_exception;
+    constexpr static std::string_view name = "kokkos::backend_exception";
+};
+
+// instantiate type-parameterized tests
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosExceptions, Exception, exception_test_type);
diff --git a/tests/backends/Kokkos/execution_space.cpp b/tests/backends/Kokkos/execution_space.cpp
new file mode 100644
index 000000000..3e54f3be5
--- /dev/null
+++ b/tests/backends/Kokkos/execution_space.cpp
@@ -0,0 +1,81 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for functions related to the different Kokkos execution spaces.
+ */
+
+#include "plssvm/backends/Kokkos/execution_space.hpp"
+
+#include "tests/custom_test_macros.hpp"  // EXPECT_CONVERSION_TO_STRING, EXPECT_CONVERSION_FROM_STRING
+
+#include "gtest/gtest.h"  // TEST, EXPECT_TRUE, EXPECT_FALSE
+
+#include <sstream>  // std::istringstream
+
+// check whether the plssvm::kokkos::execution_space -> std::string conversions are correct
+TEST(KokkosExecutionSpace, to_string) {
+    // check conversions to std::string
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::automatic, "automatic");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::cuda, "Cuda");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::hip, "HIP");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::sycl, "SYCL");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::hpx, "HPX");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::openmp, "OpenMP");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::openmp_target, "OpenMPTarget");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::openacc, "OpenACC");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::threads, "Threads");
+    EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::serial, "Serial");
+}
+
+TEST(KokkosExecutionSpace, to_string_unknown) {
+    // check conversions to std::string from unknown execution_space
+    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::kokkos::execution_space>(10), "unknown");
+}
+
+// check whether the std::string -> plssvm::kokkos::execution_space conversions are correct
+TEST(KokkosExecutionSpace, from_string) {
+    // check conversion from std::string
+    EXPECT_CONVERSION_FROM_STRING("Automatic", plssvm::kokkos::execution_space::automatic);
+    EXPECT_CONVERSION_FROM_STRING("AUTO", plssvm::kokkos::execution_space::automatic);
+    EXPECT_CONVERSION_FROM_STRING("Cuda", plssvm::kokkos::execution_space::cuda);
+    EXPECT_CONVERSION_FROM_STRING("CUDA", plssvm::kokkos::execution_space::cuda);
+    EXPECT_CONVERSION_FROM_STRING("Hip", plssvm::kokkos::execution_space::hip);
+    EXPECT_CONVERSION_FROM_STRING("HIP", plssvm::kokkos::execution_space::hip);
+    EXPECT_CONVERSION_FROM_STRING("Sycl", plssvm::kokkos::execution_space::sycl);
+    EXPECT_CONVERSION_FROM_STRING("SYCL", plssvm::kokkos::execution_space::sycl);
+    EXPECT_CONVERSION_FROM_STRING("Hpx", plssvm::kokkos::execution_space::hpx);
+    EXPECT_CONVERSION_FROM_STRING("HPX", plssvm::kokkos::execution_space::hpx);
+    EXPECT_CONVERSION_FROM_STRING("OpenMP", plssvm::kokkos::execution_space::openmp);
+    EXPECT_CONVERSION_FROM_STRING("OPENMP", plssvm::kokkos::execution_space::openmp);
+    EXPECT_CONVERSION_FROM_STRING("OpenMP_Target", plssvm::kokkos::execution_space::openmp_target);
+    EXPECT_CONVERSION_FROM_STRING("OPENMPTARGET", plssvm::kokkos::execution_space::openmp_target);
+    EXPECT_CONVERSION_FROM_STRING("OpenACC", plssvm::kokkos::execution_space::openacc);
+    EXPECT_CONVERSION_FROM_STRING("OPENACC", plssvm::kokkos::execution_space::openacc);
+    EXPECT_CONVERSION_FROM_STRING("threads", plssvm::kokkos::execution_space::threads);
+    EXPECT_CONVERSION_FROM_STRING("THREADS", plssvm::kokkos::execution_space::threads);
+    EXPECT_CONVERSION_FROM_STRING("std::threads", plssvm::kokkos::execution_space::threads);
+    EXPECT_CONVERSION_FROM_STRING("Serial", plssvm::kokkos::execution_space::serial);
+    EXPECT_CONVERSION_FROM_STRING("SERIAL", plssvm::kokkos::execution_space::serial);
+}
+
+TEST(KokkosExecutionSpace, from_string_unknown) {
+    // foo isn't a valid execution_space
+    std::istringstream input{ "foo" };
+    plssvm::kokkos::execution_space space{};
+    input >> space;
+    EXPECT_TRUE(input.fail());
+}
+
+TEST(KokkosExecutionSpace, list_available_execution_spaces) {
+    const std::vector<plssvm::kokkos::execution_space> execution_spaces = plssvm::kokkos::list_available_execution_spaces();
+
+    // at least one must be available (automatic)!
+    EXPECT_GE(execution_spaces.size(), 1);
+
+    // the automatic execution space must always be present
+    EXPECT_THAT(execution_spaces, ::testing::Contains(plssvm::kokkos::execution_space::automatic));
+}
diff --git a/tests/backends/Kokkos/execution_space_type_traits.cpp b/tests/backends/Kokkos/execution_space_type_traits.cpp
new file mode 100644
index 000000000..f813fa836
--- /dev/null
+++ b/tests/backends/Kokkos/execution_space_type_traits.cpp
@@ -0,0 +1,75 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for functions related to the different Kokkos execution spaces.
+ */
+
+#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp"
+
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, ::testing::StaticAssertTypeEq
+
+TEST(KokkosExecutionSpaceTypeTraits, execution_space_to_kokkos_type) {
+    // check conversions
+#if defined(KOKKOS_ENABLE_CUDA)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::cuda>, Kokkos::Cuda>();
+#endif
+#if defined(KOKKOS_ENABLE_HIP)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::hip>, Kokkos::HIP>();
+#endif
+#if defined(KOKKOS_ENABLE_SYCL)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::sycl>, Kokkos::SYCL>();
+#endif
+#if defined(KOKKOS_ENABLE_HPX)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::hpx>, Kokkos::Experimental::HPX>();
+#endif
+#if defined(KOKKOS_ENABLE_OPENMP)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::openmp>, Kokkos::OpenMP>();
+#endif
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::openmp_target>, Kokkos::Experimental::OpenMPTarget>();
+#endif
+#if defined(KOKKOS_ENABLE_OPENACC)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::openacc>, Kokkos::Experimental::OpenACC>();
+#endif
+#if defined(KOKKOS_ENABLE_THREADS)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::threads>, Kokkos::Threads>();
+#endif
+#if defined(KOKKOS_ENABLE_SERIAL)
+    ::testing::StaticAssertTypeEq<plssvm::kokkos::execution_space_to_kokkos_type_t<plssvm::kokkos::execution_space::serial>, Kokkos::Serial>();
+#endif
+}
+
+TEST(KokkosExecutionSpaceTypeTraits, kokkos_type_to_execution_space) {
+    // check conversions
+#if defined(KOKKOS_ENABLE_CUDA)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::Cuda>, plssvm::kokkos::execution_space::cuda);
+#endif
+#if defined(KOKKOS_ENABLE_HIP)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::HIP>, plssvm::kokkos::execution_space::hip);
+#endif
+#if defined(KOKKOS_ENABLE_SYCL)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::SYCL>, plssvm::kokkos::execution_space::sycl);
+#endif
+#if defined(KOKKOS_ENABLE_HPX)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::Experimental::HPX>, plssvm::kokkos::execution_space::hpx);
+#endif
+#if defined(KOKKOS_ENABLE_OPENMP)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::OpenMP>, plssvm::kokkos::execution_space::openmp);
+#endif
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::Experimental::OpenMPTarget>, plssvm::kokkos::execution_space::openmp_target);
+#endif
+#if defined(KOKKOS_ENABLE_OPENACC)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::Experimental::OpenACC>, plssvm::kokkos::execution_space::openacc);
+#endif
+#if defined(KOKKOS_ENABLE_THREADS)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::Threads>, plssvm::kokkos::execution_space::threads);
+#endif
+#if defined(KOKKOS_ENABLE_SERIAL)
+    EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v<Kokkos::Serial>, plssvm::kokkos::execution_space::serial);
+#endif
+}
diff --git a/tests/backends/Kokkos/kokkos_csvm.cpp b/tests/backends/Kokkos/kokkos_csvm.cpp
new file mode 100644
index 000000000..c99f8c7d5
--- /dev/null
+++ b/tests/backends/Kokkos/kokkos_csvm.cpp
@@ -0,0 +1,771 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the functionality related to the Kokkos backend.
+ */
+
+#include "plssvm/backends/Kokkos/csvm.hpp"             // plssvm::kokkos::csvm
+#include "plssvm/backends/Kokkos/detail/utility.hpp"   // plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping
+#include "plssvm/backends/Kokkos/exceptions.hpp"       // plssvm::kokkos::backend_exception
+#include "plssvm/backends/Kokkos/execution_space.hpp"  // plssvm::kokkos::execution_space
+#include "plssvm/detail/type_list.hpp"                 // plssvm::detail::label_type_list
+#include "plssvm/kernel_function_types.hpp"            // plssvm::kernel_function_type
+#include "plssvm/parameter.hpp"                        // plssvm::parameter
+#include "plssvm/target_platforms.hpp"                 // plssvm::target_platform, plssvm::list_available_target_platforms
+
+#include "tests/backends/generic_csvm_tests.hpp"      // generic CSVM tests to instantiate
+#include "tests/backends/generic_gpu_csvm_tests.hpp"  // generic GPU CSVM tests to instantiate
+#include "tests/backends/Kokkos/mock_kokkos_csvm.hpp"
+#include "tests/backends/Kokkos/utility.hpp"  // util::create_kokkos_test_tuple_impl
+#include "tests/custom_test_macros.hpp"       // EXPECT_THROW_WHAT
+#include "tests/naming.hpp"                   // naming::test_parameter_to_name
+#include "tests/types_to_test.hpp"            // util::{cartesian_type_product_t, combine_test_parameters_gtest_t}
+#include "tests/utility.hpp"                  // util::redirect_output
+
+#include "gtest/gtest.h"  // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test
+
+#include <array>    // std::array
+#include <cstddef>  // std::size_t
+#include <map>      // std::map
+#include <tuple>    // std::make_tuple, std::tuple
+#include <vector>   // std::vector
+
+class KokkosCSVM : public ::testing::Test,
+                   private util::redirect_output<> { };
+
+TEST_F(KokkosCSVM, construct_parameter) {  // execution_space automatic, target_platform automatic
+    // check whether the execution space would be automatically determined as either OpenMPTarget or OpenACC
+    const std::map<plssvm::target_platform, std::vector<plssvm::kokkos::execution_space>> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping();
+    plssvm::kokkos::execution_space space{};
+    for (const plssvm::target_platform target : plssvm::list_available_target_platforms()) {
+        if (plssvm::detail::contains(available_combinations, target)) {
+            space = available_combinations.at(target).front();
+            break;
+        }
+    }
+
+    // must throw an exception if the execution space would be OpenMPTarget or OpenACC
+    if (space == plssvm::kokkos::execution_space::openmp_target || space == plssvm::kokkos::execution_space::openacc) {
+        EXPECT_THROW_WHAT(plssvm::kokkos::csvm{ plssvm::parameter{} },
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("The Kokkos execution space {} is currently not supported !", space));
+    } else {
+        EXPECT_NO_THROW(plssvm::kokkos::csvm{ plssvm::parameter{} });
+    }
+}
+
+TEST_F(KokkosCSVM, construct_target_and_parameter) {  // execution_space automatic, target_platform explicit
+    // create parameter struct
+    const plssvm::parameter params{};
+
+    // automatic should always work
+    EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::automatic, params }));
+
+    const std::map<plssvm::target_platform, std::vector<plssvm::kokkos::execution_space>> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping();
+    const auto target_supported = [&](const plssvm::target_platform target) {
+        return plssvm::detail::contains(available_combinations, target);
+    };
+
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    if (target_supported(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform cpu!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+#if defined(PLSSVM_HAS_NVIDIA_TARGET)
+    if (target_supported(plssvm::target_platform::gpu_nvidia)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_nvidia!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+#if defined(PLSSVM_HAS_AMD_TARGET)
+    if (target_supported(plssvm::target_platform::gpu_amd)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_amd!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+#if defined(PLSSVM_HAS_INTEL_TARGET)
+    if (target_supported(plssvm::target_platform::gpu_intel)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_intel!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+}
+
+TEST_F(KokkosCSVM, construct_execution_space_and_parameter) {  // execution_space explicit, target_platform automatic
+    // create parameter struct
+    const plssvm::parameter params{};
+
+    // automatic should always work
+    EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::automatic }));
+
+    const auto target_is_available = [](const plssvm::target_platform target) {
+        return plssvm::detail::contains(plssvm::list_available_target_platforms(), target);
+    };
+
+#if defined(KOKKOS_ENABLE_CUDA)
+    // explicitly providing the Cuda execution space should work
+    if (target_is_available(plssvm::target_platform::gpu_nvidia)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace Cuda!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace Cuda is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_HIP)
+    // explicitly providing the HIP execution space should work
+    if (target_is_available(plssvm::target_platform::gpu_nvidia) || target_is_available(plssvm::target_platform::gpu_amd)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace HIP!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace HIP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_SYCL)
+    // explicitly providing the SYCL execution space should work
+    EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }));
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace SYCL is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_HPX)
+    // explicitly providing the HPX execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace HPX!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace HPX is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMP)
+    // explicitly providing the OpenMP execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace OpenMP!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace OpenMP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)
+    // explicitly providing the OpenMPTarget execution space currently unsupported
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }),
+                      plssvm::kokkos::backend_exception,
+                      "The Kokkos execution space OpenMPTarget is currently not supported !");
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace OpenMPTarget is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENACC)
+    // explicitly providing the OpenACC execution space currently unsupported
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }),
+                      plssvm::kokkos::backend_exception,
+                      "The Kokkos execution space OpenACC is currently not supported !");
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace OpenACC is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_THREADS)
+    // explicitly providing the Threads execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace Threads!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace Threads is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_SERIAL)
+    // explicitly providing the Serial execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace Serial!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace Serial is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+}
+
+TEST_F(KokkosCSVM, construct_target_and_execution_space_and_parameter) {  // execution_space explicit, target_platform explicit
+    // create parameter struct
+    const plssvm::parameter params{};
+
+    // list all possible execution spaces
+    std::vector<plssvm::kokkos::execution_space> all_execution_spaces{
+        plssvm::kokkos::execution_space::cuda,
+        plssvm::kokkos::execution_space::hip,
+        plssvm::kokkos::execution_space::sycl,
+        plssvm::kokkos::execution_space::hpx,
+        plssvm::kokkos::execution_space::openmp,
+        plssvm::kokkos::execution_space::openmp_target,
+        plssvm::kokkos::execution_space::openacc,
+        plssvm::kokkos::execution_space::threads,
+        plssvm::kokkos::execution_space::serial
+    };
+    const std::map<plssvm::target_platform, std::vector<plssvm::kokkos::execution_space>> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping();
+    const auto combination_exists = [&](const plssvm::target_platform target, const plssvm::kokkos::execution_space space) {
+        return plssvm::detail::contains(available_combinations, target) && plssvm::detail::contains(available_combinations.at(target), space);
+    };
+    const auto execution_space_available = [&](const plssvm::kokkos::execution_space space) {
+        return plssvm::detail::contains(plssvm::kokkos::list_available_execution_spaces(), space);
+    };
+
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::cpu, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform cpu!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+
+#if defined(PLSSVM_HAS_NVIDIA_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::gpu_nvidia, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_nvidia!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+
+#if defined(PLSSVM_HAS_AMD_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::gpu_amd, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_amd!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+
+#if defined(PLSSVM_HAS_INTEL_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::gpu_intel, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_intel!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+}
+
+TEST_F(KokkosCSVM, construct_named_args) {  // execution_space automatic, target_platform automatic
+    // check whether the execution space would be automatically determined as either OpenMPTarget or OpenACC
+    const std::map<plssvm::target_platform, std::vector<plssvm::kokkos::execution_space>> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping();
+    plssvm::kokkos::execution_space space{};
+    for (const plssvm::target_platform target : plssvm::list_available_target_platforms()) {
+        if (plssvm::detail::contains(available_combinations, target)) {
+            space = available_combinations.at(target).front();
+            break;
+        }
+    }
+
+    // must throw an exception if the execution space would be OpenMPTarget or OpenACC
+    if (space == plssvm::kokkos::execution_space::openmp_target || space == plssvm::kokkos::execution_space::openacc) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("The Kokkos execution space {} is currently not supported !", space));
+    } else {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+        EXPECT_NO_THROW(plssvm::kokkos::csvm{ plssvm::cost = 2.0 });
+    }
+}
+
+TEST_F(KokkosCSVM, construct_target_and_named_args) {  // execution_space automatic, target_platform explicit
+    // automatic should always work
+    EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+
+    const std::map<plssvm::target_platform, std::vector<plssvm::kokkos::execution_space>> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping();
+    const auto target_supported = [&](const plssvm::target_platform target) {
+        return plssvm::detail::contains(available_combinations, target);
+    };
+
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    if (target_supported(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform cpu!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+#if defined(PLSSVM_HAS_NVIDIA_TARGET)
+    if (target_supported(plssvm::target_platform::gpu_nvidia)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_nvidia!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+#if defined(PLSSVM_HAS_AMD_TARGET)
+    if (target_supported(plssvm::target_platform::gpu_amd)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_amd!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+#if defined(PLSSVM_HAS_INTEL_TARGET)
+    if (target_supported(plssvm::target_platform::gpu_intel)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                          plssvm::kokkos::backend_exception,
+                          fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_intel!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                      plssvm::kokkos::backend_exception,
+                      "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+}
+
+TEST_F(KokkosCSVM, construct_execution_space_and_named_args) {  // execution_space explicit, target_platform automatic
+    // automatic should always work
+    EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::automatic }));
+
+    const auto target_is_available = [](const plssvm::target_platform target) {
+        return plssvm::detail::contains(plssvm::list_available_target_platforms(), target);
+    };
+
+#if defined(KOKKOS_ENABLE_CUDA)
+    // explicitly providing the Cuda execution space should work
+    if (target_is_available(plssvm::target_platform::gpu_nvidia)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace Cuda!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace Cuda is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_HIP)
+    // explicitly providing the HIP execution space should work
+    if (target_is_available(plssvm::target_platform::gpu_nvidia) || target_is_available(plssvm::target_platform::gpu_amd)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace HIP!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace HIP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_SYCL)
+    // explicitly providing the SYCL execution space should work
+    EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }));
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace SYCL is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_HPX)
+    // explicitly providing the HPX execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace HPX!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace HPX is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMP)
+    // explicitly providing the OpenMP execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace OpenMP!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace OpenMP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)
+    // explicitly providing the OpenMPTarget execution space currently unsupported
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }),
+                      plssvm::kokkos::backend_exception,
+                      "The Kokkos execution space OpenMPTarget is currently not supported !");
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace OpenMPTarget is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENACC)
+    // explicitly providing the OpenACC execution space currently unsupported
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }),
+                      plssvm::kokkos::backend_exception,
+                      "The Kokkos execution space OpenACC is currently not supported !");
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace OpenACC is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_THREADS)
+    // explicitly providing the Threads execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace Threads!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace Threads is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+
+#if defined(KOKKOS_ENABLE_SERIAL)
+    // explicitly providing the Serial execution space should work
+    if (target_is_available(plssvm::target_platform::cpu)) {
+        EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }));
+    } else {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }),
+                          plssvm::kokkos::backend_exception,
+                          "Couldn't find a valid target_platform for the Kokkos::ExecutionSpace Serial!");
+    }
+#else
+    EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }),
+                      plssvm::kokkos::backend_exception,
+                      fmt::format("The provided Kokkos::ExecutionSpace Serial is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+#endif
+}
+
+TEST_F(KokkosCSVM, construct_target_and_execution_space_and_named_args) {  // execution_space explicit, target_platform explicit
+    // list all possible execution spaces
+    std::vector<plssvm::kokkos::execution_space> all_execution_spaces{
+        plssvm::kokkos::execution_space::cuda,
+        plssvm::kokkos::execution_space::hip,
+        plssvm::kokkos::execution_space::sycl,
+        plssvm::kokkos::execution_space::hpx,
+        plssvm::kokkos::execution_space::openmp,
+        plssvm::kokkos::execution_space::openmp_target,
+        plssvm::kokkos::execution_space::openacc,
+        plssvm::kokkos::execution_space::threads,
+        plssvm::kokkos::execution_space::serial
+    };
+    const std::map<plssvm::target_platform, std::vector<plssvm::kokkos::execution_space>> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping();
+    const auto combination_exists = [&](const plssvm::target_platform target, const plssvm::kokkos::execution_space space) {
+        return plssvm::detail::contains(available_combinations, target) && plssvm::detail::contains(available_combinations.at(target), space);
+    };
+    const auto execution_space_available = [&](const plssvm::kokkos::execution_space space) {
+        return plssvm::detail::contains(plssvm::kokkos::list_available_execution_spaces(), space);
+    };
+
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::cpu, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform cpu!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+
+#if defined(PLSSVM_HAS_NVIDIA_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::gpu_nvidia, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_nvidia!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+
+#if defined(PLSSVM_HAS_AMD_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::gpu_amd, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_amd!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+
+#if defined(PLSSVM_HAS_INTEL_TARGET)
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        if (!execution_space_available(space)) {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", ")));
+        } else if (combination_exists(plssvm::target_platform::gpu_intel, space)) {
+            EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }));
+        } else {
+            EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                              plssvm::kokkos::backend_exception,
+                              fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_intel!", space));
+        }
+    }
+#else
+    for (const plssvm::kokkos::execution_space space : all_execution_spaces) {
+        EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }),
+                          plssvm::kokkos::backend_exception,
+                          "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    }
+#endif
+}
+
+TEST_F(KokkosCSVM, get_execution_space) {
+    // construct default CSVM
+    const plssvm::kokkos::csvm svm{ plssvm::parameter{} };
+
+    // after construction: get_execution_space must refer to a plssvm::kokkos::execution_space that is not automatic
+    EXPECT_NE(svm.get_execution_space(), plssvm::kokkos::execution_space::automatic);
+}
+
+template <bool mock_grid_size, plssvm::kokkos::execution_space space>
+struct kokkos_csvm_test_type {
+    using mock_csvm_type = mock_kokkos_csvm<mock_grid_size>;
+    using csvm_type = plssvm::kokkos::csvm;
+    using device_ptr_type = typename csvm_type::device_ptr_type;
+    inline static auto additional_arguments = std::make_tuple(std::make_pair(plssvm::kokkos_execution_space, space));
+};
+
+template <plssvm::kokkos::execution_space space>
+using kokkos_csvm_test_type_without_mock = kokkos_csvm_test_type<false, space>;
+
+using kokkos_csvm_test_tuple = util::create_kokkos_test_tuple_t<kokkos_csvm_test_type_without_mock>;
+using kokkos_csvm_test_label_type_list = util::cartesian_type_product_t<kokkos_csvm_test_tuple, plssvm::detail::supported_label_types>;
+using kokkos_csvm_test_type_list = util::cartesian_type_product_t<kokkos_csvm_test_tuple>;
+
+// the tests used in the instantiated GTest test suites
+using kokkos_csvm_test_type_gtest = util::combine_test_parameters_gtest_t<kokkos_csvm_test_type_list>;
+using kokkos_solver_type_gtest = util::combine_test_parameters_gtest_t<kokkos_csvm_test_type_list, util::solver_type_list>;
+using kokkos_kernel_function_type_gtest = util::combine_test_parameters_gtest_t<kokkos_csvm_test_type_list, util::kernel_function_type_list>;
+using kokkos_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t<kokkos_csvm_test_type_list, util::solver_and_kernel_function_type_list>;
+using kokkos_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t<kokkos_csvm_test_label_type_list, util::kernel_function_and_classification_type_list>;
+using kokkos_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t<kokkos_csvm_test_label_type_list, util::solver_and_kernel_function_and_classification_type_list>;
+
+// instantiate type-parameterized tests
+// generic CSVM tests
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolver, kokkos_solver_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunctionClassification, kokkos_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
+#if !defined(KOKKOS_ENABLE_CUDA)
+// testcase doesn't compile with Kokkos::Cuda's nvcc due to template instantiation limits
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunctionClassification, kokkos_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
+#endif
+
+// generic CSVM DeathTests
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverDeathTest, kokkos_solver_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name);
+
+// generic GPU CSVM tests - correct grid sizes
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name);
+
+// generic GPU CSVM DeathTests - correct grid sizes
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericGPUCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name);
+
+template <plssvm::kokkos::execution_space space>
+using kokkos_csvm_test_type_with_mock = kokkos_csvm_test_type<true, space>;
+
+using kokkos_mock_csvm_test_tuple = util::create_kokkos_test_tuple_t<kokkos_csvm_test_type_with_mock>;
+using kokkos_mock_csvm_test_type_list = util::cartesian_type_product_t<kokkos_mock_csvm_test_tuple>;
+
+using kokkos_mock_csvm_test_type_gtest = util::combine_test_parameters_gtest_t<kokkos_mock_csvm_test_type_list>;
+using kokkos_mock_kernel_function_type_gtest = util::combine_test_parameters_gtest_t<kokkos_mock_csvm_test_type_list, util::kernel_function_type_list>;
+
+// generic GPU CSVM tests - mocked grid sizes
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVM, kokkos_mock_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVMKernelFunction, kokkos_mock_kernel_function_type_gtest, naming::test_parameter_to_name);
diff --git a/tests/backends/Kokkos/mock_kokkos_csvm.hpp b/tests/backends/Kokkos/mock_kokkos_csvm.hpp
new file mode 100644
index 000000000..6fb35cd9c
--- /dev/null
+++ b/tests/backends/Kokkos/mock_kokkos_csvm.hpp
@@ -0,0 +1,85 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief MOCK class for the C-SVM class using the Kokkos backend.
+ */
+
+#ifndef PLSSVM_TESTS_BACKENDS_KOKKOS_MOCK_KOKKOS_CSVM_HPP_
+#define PLSSVM_TESTS_BACKENDS_KOKKOS_MOCK_KOKKOS_CSVM_HPP_
+#pragma once
+
+#include "plssvm/backends/execution_range.hpp"  // plssvm::detail::dim_type
+#include "plssvm/backends/Kokkos/csvm.hpp"      // plssvm::kokkos::csvm
+
+#include "gmock/gmock.h"  // MOCK_METHOD, ON_CALL, ::testing::Return
+
+#include <cstddef>  // std::size_t
+#include <utility>  // std::forward
+
+/**
+ * @brief GTest mock class for the Kokkos CSVM.
+ * @tparam mock_grid_size `true` if the `plssvm::kokkos::csvm::get_max_grid_size()` function should be mocked, otherwise `false`
+ */
+template <bool mock_grid_size>
+class mock_kokkos_csvm final : public plssvm::kokkos::csvm {
+    using base_type = plssvm::kokkos::csvm;
+
+  public:
+    using base_type::device_ptr_type;
+
+    template <typename... Args>
+    explicit mock_kokkos_csvm(Args &&...args) :
+        base_type{ std::forward<Args>(args)... } {
+        this->fake_functions();
+    }
+
+    MOCK_METHOD((plssvm::detail::dim_type), get_max_grid_size, (const std::size_t), (const, override));
+
+    // make protected member functions public
+    using base_type::assemble_kernel_matrix;
+    using base_type::blas_level_3;
+    using base_type::get_device_memory;
+    using base_type::get_max_work_group_size;
+    using base_type::num_available_devices;
+
+    using base_type::predict_values;
+
+    using base_type::conjugate_gradients;
+    using base_type::perform_dimensional_reduction;
+    using base_type::run_assemble_kernel_matrix_implicit_blas_level_3;
+    using base_type::run_blas_level_3;
+    using base_type::solve_lssvm_system_of_linear_equations;
+
+    using base_type::get_max_mem_alloc_size;
+
+    using base_type::run_assemble_kernel_matrix_explicit;
+    using base_type::run_blas_level_3_kernel_explicit;
+    using base_type::run_inplace_matrix_addition;
+    using base_type::run_inplace_matrix_scale;
+    using base_type::run_predict_kernel;
+    using base_type::run_w_kernel;
+
+    using base_type::data_distribution_;
+    using base_type::devices_;
+
+  private:
+    /*
+     * @brief Fake the plssvm::kokkos::csvm::get_max_grid_size() function if requested.
+     */
+    void fake_functions() const {
+        if constexpr (mock_grid_size) {
+            // mock the function using hardcoded maximum grid sizes
+            ON_CALL(*this, get_max_grid_size).WillByDefault(::testing::Return(plssvm::detail::dim_type{ std::size_t{ 4 }, std::size_t{ 4 }, std::size_t{ 4 } }));
+        } else {
+            // use the actual real implementation otherwise
+            ON_CALL(*this, get_max_grid_size).WillByDefault([this](const std::size_t device_id) { return base_type::get_max_grid_size(device_id); });
+        }
+    }
+};
+
+#endif  // PLSSVM_TESTS_BACKENDS_KOKKOS_MOCK_KOKKOS_CSVM_HPP_
diff --git a/tests/backends/Kokkos/utility.hpp b/tests/backends/Kokkos/utility.hpp
new file mode 100644
index 000000000..3c3458198
--- /dev/null
+++ b/tests/backends/Kokkos/utility.hpp
@@ -0,0 +1,95 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Determine the execution spaces available for tests with the Kokkos backend.
+ */
+
+#ifndef PLSSVM_TESTS_BACKENDS_KOKKOS_UTILITY_HPP_
+#define PLSSVM_TESTS_BACKENDS_KOKKOS_UTILITY_HPP_
+#pragma once
+
+namespace util {
+
+/**
+ * @brief Determine which execution spaces can be tested based on the available Kokkos::ExecutionSpaces and PLSSVM target platforms.
+ * @return the available execution spaces for testing (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr auto constexpr_available_execution_spaces_to_test() {
+    return std::array{
+#if defined(KOKKOS_ENABLE_CUDA) && defined(PLSSVM_HAS_NVIDIA_TARGET)  // for Kokkos::Cuda, an NVIDIA target must be available
+        plssvm::kokkos::execution_space::cuda,
+#endif
+#if defined(KOKKOS_ENABLE_HIP) && (defined(PLSSVM_HAS_NVIDIA_TARGET) || defined(PLSSVM_HAS_AMD_TARGET))  // for Kokkos::HIP, an NVIDIA or AMD target must be available
+        plssvm::kokkos::execution_space::hip,
+#endif
+#if defined(KOKKOS_ENABLE_SYCL)  // for Kokkos::SYCL, any target is ok
+        plssvm::kokkos::execution_space::sycl,
+#endif
+#if defined(KOKKOS_ENABLE_HPX) && defined(PLSSVM_HAS_CPU_TARGET)  // for Kokkos::Experimental::HPX, a CPU target must be available
+        plssvm::kokkos::execution_space::hpx,
+#endif
+#if defined(KOKKOS_ENABLE_OPENMP) && defined(PLSSVM_HAS_CPU_TARGET)  // for Kokkos::OpenMP, a CPU target must be available
+        plssvm::kokkos::execution_space::openmp,
+#endif
+#if defined(KOKKOS_ENABLE_OPENMPTARGET)  // for Kokkos::Experimental::OpenMPTarget,any target is ok // TODO: implement correctly based on allowed target platforms
+        plssvm::kokkos::execution_space::openmp_target,
+#endif
+#if defined(KOKKOS_ENABLE_OPENACC)  // for Kokkos::Experimental::OpenACC,any target is ok // TODO: implement correctly based on allowed target platforms
+        plssvm::kokkos::execution_space::openacc,
+#endif
+#if defined(KOKKOS_ENABLE_THREADS) && defined(PLSSVM_HAS_CPU_TARGET)  // for Kokkos::Threads, a CPU target must be available
+        plssvm::kokkos::execution_space::threads,
+#endif
+#if defined(KOKKOS_ENABLE_SERIAL) && defined(PLSSVM_HAS_CPU_TARGET)  // for Kokkos::Serial, a CPU target must be available
+        plssvm::kokkos::execution_space::serial,
+#endif
+    };
+}
+
+/**
+ * @brief Uninstantiated base type to create a `std::tuple` containing all available `kokkos_csvm_test_type` types.
+ */
+template <template <plssvm::kokkos::execution_space> typename, typename>
+struct create_kokkos_test_tuple_impl;
+
+/**
+ * @brief Helper struct to create a `std::tuple` containing all available `test_type` types by iterating over the `std::array` of
+ *        `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
+ * @tparam test_type the test type to instantiate
+ * @tparam Is the indices to index the `std::array`
+ */
+template <template <plssvm::kokkos::execution_space> typename test_type, std::size_t... Is>
+struct create_kokkos_test_tuple_impl<test_type, std::index_sequence<Is...>> {
+    /// The array containing all available execution spaces.
+    constexpr static auto array = constexpr_available_execution_spaces_to_test();
+    /// The resulting variant type.
+    using type = std::tuple<test_type<array[Is]>...>;
+};
+
+/**
+ * @brief Create a `std::tuple` containing all available `test_type` types by iterating over the `std::array` of
+ *        `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`.
+ * @tparam test_type the test type to instantiate
+ */
+template <template <plssvm::kokkos::execution_space> typename test_type>
+struct create_kokkos_test_tuple {
+    /// The number of types in the final variant.
+    constexpr static std::size_t N = constexpr_available_execution_spaces_to_test().size();
+    /// The final tuple type.
+    using type = typename create_kokkos_test_tuple_impl<test_type, std::make_index_sequence<N>>::type;
+};
+
+/**
+ * @brief Shorthand for the `typename create_kokkos_test_tuple<...>::type` type.
+ */
+template <template <plssvm::kokkos::execution_space> typename test_type>
+using create_kokkos_test_tuple_t = typename create_kokkos_test_tuple<test_type>::type;
+
+}  // namespace util
+
+#endif  // PLSSVM_TESTS_BACKENDS_KOKKOS_UTILITY_HPP_
diff --git a/tests/backends/generic_csvm_tests.hpp b/tests/backends/generic_csvm_tests.hpp
index 58201bdee..5cd1c4d59 100644
--- a/tests/backends/generic_csvm_tests.hpp
+++ b/tests/backends/generic_csvm_tests.hpp
@@ -36,7 +36,8 @@
 #include "tests/types_to_test.hpp"          // util::{test_parameter_type_at_t, test_parameter_value_at_v}
 #include "tests/utility.hpp"                // util::{redirect_output, generate_specific_matrix, construct_from_tuple, flatten, generate_random_matrix}
 
-#include "fmt/format.h"   // fmt::format
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"
 #include "gmock/gmock.h"  // ::testing::HasSubstr
 #include "gtest/gtest.h"  // TYPED_TEST_SUITE_P, TYPED_TEST_P, REGISTER_TYPED_TEST_SUITE_P, EXPECT_EQ, EXPECT_NE, EXPECT_GT, EXPECT_TRUE, EXPECT_DEATH,
                           // ASSERT_EQ, GTEST_SKIP, SUCCEED, ::testing::Test
@@ -803,9 +804,11 @@ TYPED_TEST_P(GenericCSVMSolver, solve_lssvm_system_of_linear_equations_trivial)
 
     // check the calculated result for correctness
     EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(calculated_x, (plssvm::aos_matrix<plssvm::real_type>{ B, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }), 1e6);
-    EXPECT_TRUE(std::all_of(calculated_rho.cbegin(), calculated_rho.cend(), [front = std::abs(calculated_rho.front())](const plssvm::real_type rho) { return std::abs(rho) == front; }));
+    EXPECT_TRUE(std::all_of(calculated_rho.cbegin(), calculated_rho.cend(), [front = std::abs(calculated_rho.front())](const plssvm::real_type rho) {
+        return std::abs(std::abs(rho) - front) <= std::numeric_limits<plssvm::real_type>::epsilon();
+    }));
     for (const auto rho : calculated_rho) {
-        EXPECT_FLOATING_POINT_NEAR(std::abs(rho) - std::numeric_limits<plssvm::real_type>::epsilon(), std::numeric_limits<plssvm::real_type>::epsilon());
+        EXPECT_LE(std::abs(rho) - std::numeric_limits<plssvm::real_type>::epsilon(), std::numeric_limits<plssvm::real_type>::epsilon());
     }
     EXPECT_THAT(num_iters, ::testing::Each(::testing::Gt(0)));
 }
@@ -892,7 +895,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows() - 1, 1);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows() - 1, num_devices);
 
     // automatic solver type not permitted
     if constexpr (solver == plssvm::solver_type::automatic) {
@@ -1002,7 +1005,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix) {
     const mock_csvm_type svm = util::construct_from_tuple<mock_csvm_type>(params, csvm_test_type::additional_arguments);
     const std::size_t num_devices = svm.num_available_devices();
     // be sure to use the correct data distribution
-    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows() - 1, 1);
+    svm.data_distribution_ = std::make_unique<plssvm::detail::triangular_data_distribution>(data.num_rows() - 1, num_devices);
 
     // automatic solver type not permitted
     if constexpr (solver == plssvm::solver_type::automatic) {
diff --git a/tests/backends/generic_device_ptr_tests.hpp b/tests/backends/generic_device_ptr_tests.hpp
index 6a8713dc7..3f2407005 100644
--- a/tests/backends/generic_device_ptr_tests.hpp
+++ b/tests/backends/generic_device_ptr_tests.hpp
@@ -46,7 +46,7 @@ TYPED_TEST_P(DevicePtr, default_construct) {
 
     // empty data
     EXPECT_FALSE(static_cast<bool>(ptr));
-    EXPECT_EQ(ptr.get(), nullptr);
+    EXPECT_EQ(ptr.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(ptr.size(), 0);
     EXPECT_EQ(ptr.shape(), (plssvm::shape{ 0, 0 }));
     EXPECT_TRUE(ptr.empty());
@@ -63,7 +63,7 @@ TYPED_TEST_P(DevicePtr, construct_size) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(ptr));
-    EXPECT_NE(ptr.get(), nullptr);
+    EXPECT_NE(ptr.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(ptr.shape(), (plssvm::shape{ 42, 1 }));
     // check padding
     EXPECT_EQ(ptr.padding(), (plssvm::shape{ 0, 0 }));
@@ -81,7 +81,7 @@ TYPED_TEST_P(DevicePtr, construct_shape) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(ptr));
-    EXPECT_NE(ptr.get(), nullptr);
+    EXPECT_NE(ptr.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(ptr.shape(), (plssvm::shape{ 42, 16 }));
     // check padding
     EXPECT_EQ(ptr.padding(), (plssvm::shape{ 0, 0 }));
@@ -99,7 +99,7 @@ TYPED_TEST_P(DevicePtr, construct_shape_and_padding) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(ptr));
-    EXPECT_NE(ptr.get(), nullptr);
+    EXPECT_NE(ptr.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(ptr.shape(), (plssvm::shape{ 42, 16 }));
     // check padding
     EXPECT_EQ(ptr.padding(), (plssvm::shape{ 4, 4 }));
@@ -119,7 +119,7 @@ TYPED_TEST_P(DevicePtr, move_construct) {
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
     // EXPECT_EQ(second.queue(), queue);
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 }));
@@ -127,7 +127,7 @@ TYPED_TEST_P(DevicePtr, move_construct) {
 
     // check moved-from data
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
@@ -147,7 +147,7 @@ TYPED_TEST_P(DevicePtr, move_construct_with_padding) {
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
     // EXPECT_EQ(second.queue(), queue);
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 }));
@@ -155,7 +155,7 @@ TYPED_TEST_P(DevicePtr, move_construct_with_padding) {
 
     // check moved-from data
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
@@ -177,7 +177,7 @@ TYPED_TEST_P(DevicePtr, move_assign) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 }));
@@ -185,7 +185,7 @@ TYPED_TEST_P(DevicePtr, move_assign) {
 
     // check moved-from data
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
@@ -207,7 +207,7 @@ TYPED_TEST_P(DevicePtr, move_assign_with_padding) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 }));
@@ -215,7 +215,7 @@ TYPED_TEST_P(DevicePtr, move_assign_with_padding) {
 
     // check moved-from data
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
@@ -237,14 +237,14 @@ TYPED_TEST_P(DevicePtr, swap_member_function) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 }));
     EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 42, 1 }));
 
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
@@ -266,14 +266,14 @@ TYPED_TEST_P(DevicePtr, swap_member_function_with_padding) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 }));
     EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 46, 15 }));
 
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
@@ -296,14 +296,14 @@ TYPED_TEST_P(DevicePtr, swap_free_function) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 }));
     EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 42, 1 }));
 
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
@@ -326,14 +326,14 @@ TYPED_TEST_P(DevicePtr, swap_free_function_with_padding) {
 
     // check data
     EXPECT_TRUE(static_cast<bool>(second));
-    EXPECT_NE(second.get(), nullptr);
+    EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 }));
     // check padding
     EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 }));
     EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 46, 15 }));
 
     EXPECT_FALSE(static_cast<bool>(first));
-    EXPECT_EQ(first.get(), nullptr);
+    EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{});
     EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 }));
     // check padding
     EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 }));
diff --git a/tests/backends/generic_gpu_csvm_tests.hpp b/tests/backends/generic_gpu_csvm_tests.hpp
index dea31b85c..c2d8edd36 100644
--- a/tests/backends/generic_gpu_csvm_tests.hpp
+++ b/tests/backends/generic_gpu_csvm_tests.hpp
@@ -156,7 +156,7 @@ TYPED_TEST_P(GenericGPUCSVM, run_blas_level_3_kernel_explicit) {
         ground_truth::device_specific_gemm(alpha, full_kernel_matrix, B, correct_C, *svm.data_distribution_, device_id);
 
         // check C for correctness
-        EXPECT_FLOATING_POINT_MATRIX_NEAR(C_res, correct_C);
+        EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(C_res, correct_C, 1e6);
     }
 }
 
@@ -223,7 +223,7 @@ TYPED_TEST_P(GenericGPUCSVM, run_w_kernel) {
         const plssvm::soa_matrix<plssvm::real_type> correct_w = ground_truth::calculate_device_specific_w(weights, data.data(), *svm.data_distribution_, device_id);
 
         // check for correctness
-        EXPECT_FLOATING_POINT_MATRIX_NEAR(w, correct_w);
+        EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(w, correct_w, 1e6);
     }
 }
 
diff --git a/tests/csvm_factory.cpp b/tests/csvm_factory.cpp
index 12f3f3606..f9bdd783e 100644
--- a/tests/csvm_factory.cpp
+++ b/tests/csvm_factory.cpp
@@ -63,6 +63,9 @@ std::string GetTypeName<util::test_parameter<util::type_list<plssvm::dpcpp::csvm
 
 template <>
 std::string GetTypeName<util::test_parameter<util::type_list<plssvm::adaptivecpp::csvm>, util::value_list<>>>() { return "sycl_adaptivecpp_csvm"; }
+
+template <>
+std::string GetTypeName<util::test_parameter<util::type_list<plssvm::kokkos::csvm>, util::value_list<>>>() { return "kokkos_csvm"; }
 }  // namespace testing::internal
 
 template <typename T>
@@ -234,7 +237,7 @@ TEST(CSVMFactory, factory_named_parameter) {
 }
 
 TEST(CSVMFactory, invalid_backend) {
-    EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast<plssvm::backend_type>(8)),
+    EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast<plssvm::backend_type>(9)),
                       plssvm::unsupported_backend_exception,
                       "Unrecognized backend provided!");
 }
diff --git a/tests/detail/cmd/parser_predict.cpp b/tests/detail/cmd/parser_predict.cpp
index 0a4da53d9..62c83f862 100644
--- a/tests/detail/cmd/parser_predict.cpp
+++ b/tests/detail/cmd/parser_predict.cpp
@@ -11,6 +11,7 @@
 #include "plssvm/detail/cmd/parser_predict.hpp"
 
 #include "plssvm/backend_types.hpp"                       // plssvm::backend_type
+#include "plssvm/backends/Kokkos/execution_space.hpp"     // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"  // plssvm::sycl::implementation_type
 #include "plssvm/constants.hpp"                           // plssvm::real_type
 #include "plssvm/target_platforms.hpp"                    // plssvm::target_platform
@@ -67,6 +68,7 @@ TEST_F(ParserPredict, minimal_output) {
         "backend: automatic\n"
         "target platform: automatic\n"
         "SYCL implementation type: automatic\n"
+        "Kokkos execution space: automatic\n"
         "label_type: int (default)\n"
         "real_type: {}\n"
         "input file (data set): 'data.libsvm'\n"
@@ -85,6 +87,10 @@ TEST_F(ParserPredict, all_arguments) {
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
     cmd_args.insert(cmd_args.end(), { "--sycl_implementation_type", "dpcpp" });
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    const plssvm::kokkos::execution_space space = plssvm::kokkos::list_available_execution_spaces()[1];  // [0] would be automatic
+    cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", fmt::format("{}", space) });
+#endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
 #endif
@@ -101,6 +107,11 @@ TEST_F(ParserPredict, all_arguments) {
     EXPECT_EQ(parser.sycl_implementation_type, plssvm::sycl::implementation_type::dpcpp);
 #else
     EXPECT_EQ(parser.sycl_implementation_type, plssvm::sycl::implementation_type::automatic);
+#endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    EXPECT_EQ(parser.kokkos_execution_space, space);
+#else
+    EXPECT_EQ(parser.kokkos_execution_space, plssvm::kokkos::execution_space::automatic);
 #endif
     EXPECT_TRUE(parser.strings_as_labels);
     EXPECT_EQ(parser.input_filename, "data.libsvm");
@@ -117,10 +128,14 @@ TEST_F(ParserPredict, all_arguments) {
 
 TEST_F(ParserPredict, all_arguments_output) {
     // create artificial command line arguments in test fixture
-    std::vector<std::string> cmd_args = { "./plssvm-predict", "--backend", "cuda", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" };
+    std::vector<std::string> cmd_args = { "./plssvm-predict", "--backend", "automatic", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" };
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
     cmd_args.insert(cmd_args.end(), { "--sycl_implementation_type", "dpcpp" });
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    const plssvm::kokkos::execution_space space = plssvm::kokkos::list_available_execution_spaces()[1];  // [0] would be automatic
+    cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", fmt::format("{}", space) });
+#endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
 #endif
@@ -131,15 +146,27 @@ TEST_F(ParserPredict, all_arguments_output) {
     const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
 
     // test output string
-    std::string correct = fmt::format(
-        "backend: cuda\n"
+    std::string correct{
+        "backend: automatic\n"
         "target platform: gpu_nvidia\n"
-        "label_type: std::string\n"
-        "real_type: {}\n"
-        "input file (data set): 'data1.libsvm'\n"
-        "input file (model): 'data2.libsvm.model'\n"
-        "output file (prediction): 'data3.libsvm.predict'\n",
-        std::is_same_v<plssvm::real_type, float> ? "float" : "double (default)");
+    };
+#if defined(PLSSVM_HAS_SYCL_BACKEND)
+    correct += "SYCL implementation type: dpcpp\n";
+#else
+    correct += "SYCL implementation type: automatic\n";
+#endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    correct += fmt::format("Kokkos execution space: {}\n", space);
+#else
+    correct += "Kokkos execution space: automatic\n";
+#endif
+    correct += fmt::format("label_type: std::string\n"
+                           "real_type: {}\n"
+                           "input file (data set): 'data1.libsvm'\n"
+                           "input file (model): 'data2.libsvm.model'\n"
+                           "output file (prediction): 'data3.libsvm.predict'\n",
+                           std::is_same_v<plssvm::real_type, float> ? "float" : "double (default)");
+
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     correct += "performance tracking file: 'tracking.yaml'\n";
 #endif
@@ -168,7 +195,7 @@ TEST_P(ParserPredictBackend, parsing) {
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictBackend, ::testing::Combine(
                 ::testing::Values("-b", "--backend"),
-                ::testing::Values("automatic", "OpenMP", "HPX", "CUDA", "HIP", "OpenCL", "SYCL")),
+                ::testing::Values("automatic", "OpenMP", "HPX", "stdpar", "CUDA", "HIP", "OpenCL", "SYCL", "Kokkos")),
                 naming::pretty_print_parameter_flag_and_value<ParserPredictBackend>);
 // clang-format on
 
@@ -220,6 +247,32 @@ INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictSYCLImplementation, ::testi
 
 #endif  // PLSSVM_HAS_SYCL_BACKEND
 
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+
+class ParserPredictKokkosExecutionSpace : public ParserPredict,
+                                          public ::testing::WithParamInterface<std::tuple<std::string, std::string>> { };
+
+TEST_P(ParserPredictKokkosExecutionSpace, parsing) {
+    const auto &[flag, value] = GetParam();
+    // convert string to kokkos::execution_space
+    const auto kokkos_execution_space = util::convert_from_string<plssvm::kokkos::execution_space>(value);
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
+    // create parameter object
+    const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
+    // test for correctness
+    EXPECT_EQ(parser.kokkos_execution_space, kokkos_execution_space);
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserPredictKokkosExecutionSpace, ::testing::Combine(
+                ::testing::Values("--kokkos_execution_space"),
+                ::testing::Values("automatic", "Cuda", "HIP", "SYCL", "HPX", "OpenMP", "OpenMPTarget", "OpenACC", "Threads", "Serial")),
+                naming::pretty_print_parameter_flag_and_value<ParserPredictKokkosExecutionSpace>);
+// clang-format on
+
+#endif  // PLSSVM_HAS_KOKKOS_BACKEND
+
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
 
 class ParserPredictPerformanceTrackingFilename : public ParserPredict,
@@ -270,7 +323,7 @@ class ParserPredictVerbosity : public ParserPredict,
 TEST_P(ParserPredictVerbosity, parsing) {
     const auto &[flag, value] = GetParam();
     // create artificial command line arguments in test fixture
-    this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm", "data.libsvm.model" });
+    this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" });
     // create parameter object
     const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() };
     // test for correctness
diff --git a/tests/detail/cmd/parser_train.cpp b/tests/detail/cmd/parser_train.cpp
index ae1eafbaf..11d5bdffe 100644
--- a/tests/detail/cmd/parser_train.cpp
+++ b/tests/detail/cmd/parser_train.cpp
@@ -11,6 +11,7 @@
 #include "plssvm/detail/cmd/parser_train.hpp"
 
 #include "plssvm/backend_types.hpp"                          // plssvm::backend_type
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"     // plssvm::sycl::implementation_type
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/classification_types.hpp"                   // plssvm::classification_type
@@ -88,6 +89,7 @@ TEST_F(ParserTrain, minimal_output) {
         "solver: automatic\n"
         "SYCL implementation type: automatic\n"
         "SYCL kernel invocation type: automatic\n"
+        "Kokkos execution space: automatic\n"
         "classification_type: one vs. all\n"
         "label_type: int\n"
         "real_type: {}\n"
@@ -105,6 +107,10 @@ TEST_F(ParserTrain, all_arguments) {
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
     cmd_args.insert(cmd_args.end(), { "--sycl_kernel_invocation_type", "nd_range", "--sycl_implementation_type", "dpcpp" });
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    const plssvm::kokkos::execution_space space = plssvm::kokkos::list_available_execution_spaces()[1];  // [0] would be automatic
+    cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", fmt::format("{}", space) });
+#endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
 #endif
@@ -134,6 +140,11 @@ TEST_F(ParserTrain, all_arguments) {
 #else
     EXPECT_EQ(parser.sycl_kernel_invocation_type, plssvm::sycl::kernel_invocation_type::automatic);
     EXPECT_EQ(parser.sycl_implementation_type, plssvm::sycl::implementation_type::automatic);
+#endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    EXPECT_EQ(parser.kokkos_execution_space, space);
+#else
+    EXPECT_EQ(parser.kokkos_execution_space, plssvm::kokkos::execution_space::automatic);
 #endif
     EXPECT_TRUE(parser.strings_as_labels);
     EXPECT_EQ(parser.input_filename, "data.libsvm");
@@ -147,10 +158,14 @@ TEST_F(ParserTrain, all_arguments) {
 
 TEST_F(ParserTrain, all_arguments_output) {
     // create artificial command line arguments in test fixture
-    std::vector<std::string> cmd_args = { "./plssvm-train", "--kernel_type", "1", "--degree", "2", "--gamma", "1.5", "--coef0", "-1.5", "--cost", "2", "--epsilon", "1e-10", "--max_iter", "100", "--classification", "oao", "--solver", "cg_implicit", "--backend", "sycl", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" };
+    std::vector<std::string> cmd_args = { "./plssvm-train", "--kernel_type", "1", "--degree", "2", "--gamma", "1.5", "--coef0", "-1.5", "--cost", "2", "--epsilon", "1e-10", "--max_iter", "100", "--classification", "oao", "--solver", "cg_implicit", "--backend", "automatic", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" };
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
     cmd_args.insert(cmd_args.end(), { "--sycl_kernel_invocation_type", "nd_range", "--sycl_implementation_type", "dpcpp" });
 #endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    const std::string space = fmt::format("{}", plssvm::kokkos::list_available_execution_spaces()[1]);  // [0] would be automatic
+    cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", space });
+#endif
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
     cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" });
 #endif
@@ -169,7 +184,7 @@ TEST_F(ParserTrain, all_arguments_output) {
         "cost: 2\n"
         "epsilon: 1e-10\n"
         "max_iter: 100\n"
-        "backend: sycl\n"
+        "backend: automatic\n"
         "target platform: gpu_nvidia\n"
         "solver: cg_implicit\n";
 #if defined(PLSSVM_HAS_SYCL_BACKEND)
@@ -178,6 +193,11 @@ TEST_F(ParserTrain, all_arguments_output) {
 #else
     correct += "SYCL implementation type: automatic\n"
                "SYCL kernel invocation type: automatic\n";
+#endif
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+    correct += fmt::format("Kokkos execution space: {}\n", space);
+#else
+    correct += "Kokkos execution space: automatic\n";
 #endif
     correct += fmt::format(
         "classification_type: one vs. one\n"
@@ -443,7 +463,7 @@ TEST_P(ParserTrainBackend, parsing) {
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainBackend, ::testing::Combine(
                 ::testing::Values("-b", "--backend"),
-                ::testing::Values("automatic", "OpenMP", "HPX", "CUDA", "HIP", "OpenCL", "SYCL")),
+                ::testing::Values("automatic", "OpenMP", "HPX", "stdpar", "CUDA", "HIP", "OpenCL", "SYCL", "Kokkos")),
                 naming::pretty_print_parameter_flag_and_value<ParserTrainBackend>);
 // clang-format on
 
@@ -517,6 +537,32 @@ INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainSYCLImplementation, ::testing::
 
 #endif  // PLSSVM_HAS_SYCL_BACKEND
 
+#if defined(PLSSVM_HAS_KOKKOS_BACKEND)
+
+class ParserTrainKokkosExecutionSpace : public ParserTrain,
+                                        public ::testing::WithParamInterface<std::tuple<std::string, std::string>> { };
+
+TEST_P(ParserTrainKokkosExecutionSpace, parsing) {
+    const auto &[flag, value] = GetParam();
+    // convert string to kokkos::execution_space
+    const auto kokkos_execution_space = util::convert_from_string<plssvm::kokkos::execution_space>(value);
+    // create artificial command line arguments in test fixture
+    this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" });
+    // create parameter object
+    const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() };
+    // test for correctness
+    EXPECT_EQ(parser.kokkos_execution_space, kokkos_execution_space);
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainKokkosExecutionSpace, ::testing::Combine(
+                ::testing::Values("--kokkos_execution_space"),
+                ::testing::Values("automatic", "Cuda", "HIP", "SYCL", "HPX", "OpenMP", "OpenMPTarget", "OpenACC", "Threads", "Serial")),
+                naming::pretty_print_parameter_flag_and_value<ParserTrainKokkosExecutionSpace>);
+// clang-format on
+
+#endif  // PLSSVM_HAS_KOKKOS_BACKEND
+
 #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED)
 
 class ParserTrainPerformanceTrackingFilename : public ParserTrain,
diff --git a/tests/detail/tracking/performance_tracker.cpp b/tests/detail/tracking/performance_tracker.cpp
index cc3eb9ee9..cf4f9ff3d 100644
--- a/tests/detail/tracking/performance_tracker.cpp
+++ b/tests/detail/tracking/performance_tracker.cpp
@@ -340,7 +340,7 @@ TEST_F(PerformanceTracker, add_parser_train_tracking_entry) {
     // check entries for correctness
     EXPECT_EQ(entries.size(), 1);
 
-    ASSERT_EQ(entries.at("parameter").size(), 17);
+    ASSERT_EQ(entries.at("parameter").size(), 18);
 }
 
 TEST_F(PerformanceTracker, add_parser_predict_tracking_entry) {
@@ -363,7 +363,7 @@ TEST_F(PerformanceTracker, add_parser_predict_tracking_entry) {
     // check entries for correctness
     EXPECT_EQ(entries.size(), 1);
 
-    ASSERT_EQ(entries.at("parameter").size(), 9);
+    ASSERT_EQ(entries.at("parameter").size(), 10);
 }
 
 TEST_F(PerformanceTracker, add_parser_scale_tracking_entry) {
diff --git a/tests/naming.hpp b/tests/naming.hpp
index 811266fe1..a7305fe4f 100644
--- a/tests/naming.hpp
+++ b/tests/naming.hpp
@@ -32,7 +32,7 @@
 #include <string>       // std::string
 #include <string_view>  // std::string_view
 #include <tuple>        // std::tuple, std::tuple_element_t, std::tuple_size_v, std::get
-#include <type_traits>  // std::true_type, std::false_type, std::is_same_v, std::is_arithmetic_v, std::is_base_of_v
+#include <type_traits>  // std::true_type, std::false_type, std::is_same_v, std::is_arithmetic_v, std::is_base_of_v, std::void_t
 
 namespace naming {
 
@@ -91,6 +91,21 @@ PLSSVM_CREATE_HAS_MEMBER_TYPEDEF_TYPE_TRAIT(pinned_memory_type)
 
 #undef PLSSVM_CREATE_HAS_MEMBER_TYPEDEF_TYPE_TRAIT
 
+/**
+ * @brief A macro to create type traits for testing whether a type has a static variable declaration called @p def.
+ */
+#define PLSSVM_CREATE_HAS_MEMBER_VARIABLE_TYPE_TRAIT(def)                                                   \
+    template <typename T, typename = void>                                                                  \
+    struct enable_if_##def##_member_variable_exists : std::false_type { };                                  \
+    template <typename T>                                                                                   \
+    struct enable_if_##def##_member_variable_exists<T, std::void_t<decltype(T::def)>> : std::true_type { }; \
+    template <typename T>                                                                                   \
+    constexpr bool has_##def##_member_variable_v = enable_if_##def##_member_variable_exists<T>::value;
+
+PLSSVM_CREATE_HAS_MEMBER_VARIABLE_TYPE_TRAIT(space)
+
+#undef PLSSVM_CREATE_HAS_MEMBER_VARIABLE_TYPE_TRAIT
+
 /**
  * @brief Escape some characters of the string such that GTest accepts it as test case name.
  * @details Replaces some special cases for better readability: "-" with "_M_" (for Minus), " " with "_W_" (for Whitespace), "." with "_D_" (for dot),
@@ -153,10 +168,22 @@ template <typename T>
     } else if constexpr (std::is_base_of_v<plssvm::exception, T>) {
         return std::string{ util::exception_type_name<T>() };
     } else if constexpr (has_csvm_type_member_typedef_v<T>) {
-        return fmt::format("{}", plssvm::csvm_to_backend_type_v<typename T::csvm_type>);
+        // clang-format off
+        return fmt::format("{}{}", plssvm::csvm_to_backend_type_v<typename T::csvm_type>, std::apply([](const auto &...args) {
+                               if constexpr (sizeof...(args) == 0) {
+                                   return std::string{};
+                               } else {
+                                   return (fmt::format("_{}", args.second) + ...);
+                               }
+                           }, T::additional_arguments));
+        // clang-format on
     } else if constexpr (has_device_ptr_type_member_typedef_v<T>) {
         using device_ptr_type = typename T::device_ptr_type;
-        return fmt::format("{}", plssvm::detail::arithmetic_type_name<typename device_ptr_type::value_type>());
+        std::string test_name{ fmt::format("{}", plssvm::detail::arithmetic_type_name<typename device_ptr_type::value_type>()) };
+        if constexpr (has_space_member_variable_v<T>) {
+            test_name += fmt::format("_{}", T::space);
+        }
+        return test_name;
     } else if constexpr (has_pinned_memory_type_member_typedef_v<T>) {
         using pinned_memory_type = typename T::pinned_memory_type;
         return fmt::format("{}", plssvm::detail::arithmetic_type_name<typename pinned_memory_type::value_type>());
diff --git a/tests/parameter.cpp b/tests/parameter.cpp
index 588fc703c..7db96c6bf 100644
--- a/tests/parameter.cpp
+++ b/tests/parameter.cpp
@@ -10,6 +10,7 @@
 
 #include "plssvm/parameter.hpp"
 
+#include "plssvm/backends/Kokkos/execution_space.hpp"        // plssvm::kokkos::execution_space
 #include "plssvm/backends/SYCL/implementation_types.hpp"     // plssvm::sycl::implementation_type
 #include "plssvm/backends/SYCL/kernel_invocation_types.hpp"  // plssvm::sycl::kernel_invocation_type
 #include "plssvm/constants.hpp"                              // plssvm::real_type
@@ -98,7 +99,8 @@ TEST(Parameter, construct_parameter_and_named_args) {
     const plssvm::parameter param{ param_base,
                                    plssvm::kernel_type = plssvm::kernel_function_type::rbf,
                                    plssvm::sycl_implementation_type = plssvm::sycl::implementation_type::adaptivecpp,
-                                   plssvm::sycl_kernel_invocation_type = plssvm::sycl::kernel_invocation_type::nd_range };
+                                   plssvm::sycl_kernel_invocation_type = plssvm::sycl::kernel_invocation_type::nd_range,
+                                   plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda };
 
     // test default values
     EXPECT_EQ(param.kernel_type, plssvm::kernel_function_type::rbf);
diff --git a/tests/types_to_test.hpp b/tests/types_to_test.hpp
index 8697247bd..3bcbf1e29 100644
--- a/tests/types_to_test.hpp
+++ b/tests/types_to_test.hpp
@@ -115,6 +115,33 @@ constexpr auto test_parameter_value_at_v = std::get<I>(ValueList::values);
 
 namespace detail {
 
+/**
+ * @brief Copy all types in the @p Tuple using the indices @p Is.
+ * @tparam Tuple the tuple types to clone
+ * @tparam Is the index sequence used to clone the tuple types
+ * @param[in] tuple the tuple to clone
+ */
+template <typename Tuple, std::size_t... Is>
+[[nodiscard]] constexpr auto expand_tuple(std::index_sequence<Is...>, const Tuple &tuple) {
+    return std::tuple_cat((static_cast<void>(Is), tuple)...);
+}
+
+/**
+ * @brief Copy all types in the @p Tuple @p N times.
+ * @tparam N the time how often the types should be cloned
+ * @tparam Tuple the tuple types to clone
+ */
+template <std::size_t N, typename Tuple>
+struct clone_tuple_types {
+    using type = decltype(expand_tuple(std::make_index_sequence<N>(), std::declval<Tuple>()));
+};
+
+/**
+ * @brief Shorthand for the `typename clone_tuple_types<N, Tuple>::type` type.
+ */
+template <std::size_t N, typename Tuple>
+using clone_tuple_types_t = typename clone_tuple_types<N, Tuple>::type;
+
 // convert the types in a tuple to GoogleTests ::testing::Type
 template <typename Ts>
 struct tuple_to_gtest_types;
@@ -196,41 +223,38 @@ struct wrap_in_value_list<Array, std::index_sequence<I...>> {
 template <const auto &Array>
 using wrap_in_value_list_t = typename wrap_in_value_list<Array>::type;
 
-template <typename T, std::size_t, std::size_t, const auto &Array, typename Tuple>
-struct combine_values;
+template <typename, const auto &, typename>
+struct combine_values_impl { };
 
 /**
- * @brief Recursion termination: add the last value in the @p Array to the `value_list`s in the std::tuple.
- * @tparam T the type in the array
- * @tparam SIZE the size of the array
- * @tparam Array the array
- * @tparam Types the already existing `value_list`s
+ * @brief Iteratively add the values in @p Array at position @p IS to the `value_list`s in the std::tuple @p Tuple.
+ * @tparam Tuple the tuple types
+ * @tparam Array the array values to add
+ * @tparam Is the indices in @p Array
  */
-template <typename T, std::size_t SIZE, const std::array<T, SIZE> &Array, typename... Types>
-struct combine_values<T, SIZE, 0, Array, std::tuple<Types...>> {
-    using type = std::tuple<add_to_value_list_t<Types, std::get<0>(Array)>...>;
+template <typename Tuple, const auto &Array, std::size_t... Is>
+struct combine_values_impl<Tuple, Array, std::index_sequence<Is...>> {
+    constexpr static std::size_t N = Array.size();
+    using type = std::tuple<add_to_value_list_t<std::tuple_element_t<Is, Tuple>, std::get<Is % N>(Array)>...>;
 };
 
 /**
- * @brief Recursively add the value @p I of the @p Array to the `value_list`s in the std::tuple.
- * @tparam T the type in the array
- * @tparam SIZE the size of the array
- * @tparam I the currently investigated array element
- * @tparam Array the array
- * @tparam Types the already existing `value_list`s
+ * @brief Add the values in @p Array to the `value_list`s in the std::tuple @p Tuple.
+ * @tparam Tuple the tuple types
+ * @tparam Array the array values to add
  */
-template <typename T, std::size_t SIZE, std::size_t I, const std::array<T, SIZE> &Array, typename... Types>
-struct combine_values<T, SIZE, I, Array, std::tuple<Types...>> {
-    using type = concat_tuple_types_t<
-        std::tuple<add_to_value_list_t<Types, std::get<I>(Array)>...>,
-        typename combine_values<T, SIZE, I - 1, Array, std::tuple<Types...>>::type>;
+template <typename Tuple, const auto &Array>
+struct combine_values {
+    // clone the types in the Tuple N-times where N is the number of values in the Array
+    using cloned_tuple = clone_tuple_types_t<Array.size(), Tuple>;
+    using type = typename combine_values_impl<cloned_tuple, Array, std::make_index_sequence<std::tuple_size_v<cloned_tuple>>>::type;
 };
 
 /**
  * @brief Shorthand for `typename combine_values<...>::type`.
  */
 template <const auto &Array, typename Tuple>
-using combine_values_t = typename combine_values<typename plssvm::detail::remove_cvref_t<decltype(Array)>::value_type, Array.size(), Array.size() - 1, Array, Tuple>::type;
+using combine_values_t = typename combine_values<Tuple, Array>::type;
 
 /**
  * @brief Calculate the cartesian product of the values in @p FirstArray and @p RemainingArrays recursively.
@@ -292,37 +316,38 @@ struct wrap_in_type_list<std::tuple<Types...>> {
 template <typename Tuple>
 using wrap_in_type_list_t = typename wrap_in_type_list<Tuple>::type;
 
-template <std::size_t, typename Tuple, typename ResultTuple>
-struct combine_types;
+template <typename Tuple, typename CurrentTuple, typename>
+struct combine_types_impl { };
 
 /**
- * @brief Recursion termination: add the last type in the @p Tuple to the `type_list`s in the std::tuple.
- * @tparam Tuple the std::tuple containing the types to add
- * @tparam ResultTupleTypes the already existing `type_list`s
+ * @brief Iteratively add the types in @p CurrentTuple at position @p IS to the `type_list`s in the std::tuple @p Tuple.
+ * @tparam Tuple the tuple types
+ * @tparam CurrentTuple the types in the current tuple
+ * @tparam Is the indices in @p Array
  */
-template <typename Tuple, typename... ResultTupleTypes>
-struct combine_types<0, Tuple, std::tuple<ResultTupleTypes...>> {
-    using type = std::tuple<add_to_type_list_t<ResultTupleTypes, std::tuple_element_t<0, Tuple>>...>;
+template <typename Tuple, typename CurrentTuple, std::size_t... Is>
+struct combine_types_impl<Tuple, CurrentTuple, std::index_sequence<Is...>> {
+    constexpr static std::size_t N = std::tuple_size_v<CurrentTuple>;
+    using type = std::tuple<add_to_type_list_t<std::tuple_element_t<Is, Tuple>, std::tuple_element_t<Is % N, CurrentTuple>>...>;
 };
 
 /**
- * @brief Recursively add the type @p I of the @p Tuple to the `type_list`s in the std::tuple.
- * @tparam I the currently investigated tuple element
- * @tparam Tuple the tuple
- * @tparam ResultTupleTypes the already existing `type_list`s
+ * @brief Add the types in @p CurrentTuple to the `type_list`s in the std::tuple @p Tuple.
+ * @tparam Tuple the tuple types
+ * @tparam CurrentTuple the types in the current tuple
  */
-template <std::size_t I, typename Tuple, typename... ResultTupleTypes>
-struct combine_types<I, Tuple, std::tuple<ResultTupleTypes...>> {
-    using type = concat_tuple_types_t<
-        std::tuple<add_to_type_list_t<ResultTupleTypes, std::tuple_element_t<I, Tuple>>...>,
-        typename combine_types<I - 1, Tuple, std::tuple<ResultTupleTypes...>>::type>;
+template <typename Tuple, typename CurrentTuple>
+struct combine_types {
+    // clone the types in the Tuple N-times where N is the number of types in the CurrentTuple
+    using cloned_tuple = clone_tuple_types_t<std::tuple_size_v<CurrentTuple>, Tuple>;
+    using type = typename combine_types_impl<cloned_tuple, CurrentTuple, std::make_index_sequence<std::tuple_size_v<cloned_tuple>>>::type;
 };
 
 /**
  * @brief Shorthand for `typename combine_types<...>::type`.
  */
 template <typename Tuple, typename ResultTuple>
-using combine_types_t = typename combine_types<std::tuple_size_v<Tuple> - 1, Tuple, ResultTuple>::type;
+using combine_types_t = typename combine_types<ResultTuple, Tuple>::type;
 
 /**
  * @brief Calculate the cartesian product of the types in @p FirstTuple and @p RemainingTuples recursively.
@@ -473,7 +498,7 @@ constexpr std::array<plssvm::classification_type, 2> classification_types_to_tes
     plssvm::classification_type::oaa, plssvm::classification_type::oao
 };
 /// A list of all available solver types.
-constexpr std::array<plssvm::solver_type, 4> solver_types_to_test = {
+constexpr std::array<plssvm::solver_type, 3> solver_types_to_test{
     plssvm::solver_type::automatic, plssvm::solver_type::cg_explicit, plssvm::solver_type::cg_implicit
 };
 
diff --git a/tests/utility.hpp b/tests/utility.hpp
index 8e4f51e4f..61a20451d 100644
--- a/tests/utility.hpp
+++ b/tests/utility.hpp
@@ -46,7 +46,8 @@
 #include <string>       // std::string
 #include <tuple>        // std::tuple, std::make_tuple, std::get, std::tuple_size
 #include <type_traits>  // std::is_floating_point_v, std::is_same_v, std::is_signed_v, std::is_unsigned_v, std::decay_t
-#include <utility>      // std::pair, std::make_pair, std::move, std::make_index_sequence, std::index_sequence
+#include <utility>      // std::pair, std::make_pair, std::move, std::make_index_sequence, std::index_sequence, std::forward
+#include <variant>      // std::variant_size_v, std::variant_alternative_t
 #include <vector>       // std::vector
 
 namespace util {
@@ -694,6 +695,23 @@ template <typename T, typename Tuple>
     return count;
 }
 
+/**
+ * @brief Call the function @p func for each type in the @p Variant.
+ * @brief The function @p func must have a templated overload of the `operator()()` function.
+ * @tparam Variant the type of the std::variant
+ * @tparam Func the type of the function to apply
+ * @tparam Index the current index of the type the function should be applied to
+ * @param[in] func the function
+ */
+template <typename Variant, typename Func, std::size_t Index = 0>
+constexpr void for_each_variant_type(Func &&func) {
+    if constexpr (Index < std::variant_size_v<Variant>) {
+        using T = std::variant_alternative_t<Index, Variant>;
+        func.template operator()<T>();  // Call function with current type
+        for_each_variant_type<Variant, Func, Index + 1>(std::forward<Func>(func));
+    }
+}
+
 }  // namespace util
 
 #endif  // PLSSVM_TESTS_UTILITY_HPP_