From b47f244e8bee41e885efcdd11d249b535c7d2335 Mon Sep 17 00:00:00 2001
From: Nizar Ben Bouchta <nizarbenbouchta@gmail.com>
Date: Thu, 24 Oct 2024 08:01:28 +0000
Subject: [PATCH] included gpu_utils

---
 keopscore/keopscore/config/Config_new.py | 237 +++++++++++++++++++----
 1 file changed, 201 insertions(+), 36 deletions(-)

diff --git a/keopscore/keopscore/config/Config_new.py b/keopscore/keopscore/config/Config_new.py
index 74a646d9..64e6de38 100644
--- a/keopscore/keopscore/config/Config_new.py
+++ b/keopscore/keopscore/config/Config_new.py
@@ -5,13 +5,11 @@
 import sys
 from ctypes import CDLL, RTLD_GLOBAL
 from ctypes.util import find_library
-import keopscore
-from keopscore.utils.misc_utils import KeOps_Warning, KeOps_Print
-
+import ctypes
+import tempfile
 from pathlib import Path
-import shutil
-import sys
-import os
+import keopscore
+from keopscore.utils.misc_utils import KeOps_Warning, KeOps_Error, KeOps_Print
 
 class ConfigNew:
     """
@@ -20,13 +18,18 @@ class ConfigNew:
     to display configuration and health status information.
     """
 
+    # CUDA constants
+    CUDA_SUCCESS = 0
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8
+
     def __init__(self):
         # Initialize attributes with default values or None
         self.os = None
         self.python_version = None
         self.env_type = None
-        self.use_cuda = None
-        self.use_OpenMP = None
+        self._use_cuda = None
+        self._use_OpenMP = None
 
         self.base_dir_path = None
         self.template_path = None
@@ -43,6 +46,15 @@ def __init__(self):
         self.disable_pragma_unrolls = None
         self.init_cudalibs_flag = False
 
+        # CUDA related attributes
+        self.libcuda_folder = None
+        self.libnvrtc_folder = None
+        self.cuda_include_path = None
+        self.cuda_version = None
+        self.n_gpus = 0
+        self.gpu_compile_flags = ''
+        self.cuda_message = ''
+
         # Initialize all attributes using their setter methods
         self.set_os()
         self.set_python_version()
@@ -109,31 +121,40 @@ def print_env_type(self):
     def set_use_cuda(self):
         """Determine and set whether to use CUDA."""
         # By default, try to use CUDA
-        self.use_cuda = True
-        # Additional logic can be added here to check CUDA availability
+        self._use_cuda = True
+        if not self._cuda_libraries_available():
+            self._use_cuda = False
+            self.cuda_message = "CUDA libraries not found."
+        else:
+            self.get_cuda_version()
+            self.get_cuda_include_path()
+            self.get_gpu_props()
+            if self.n_gpus == 0:
+                self._use_cuda = False
+                self.cuda_message = "No GPUs found."
 
     def get_use_cuda(self):
         """Get the use_cuda flag."""
-        return self.use_cuda
+        return self._use_cuda
 
     def print_use_cuda(self):
         """Print the CUDA support status."""
-        status = "Enabled" if self.use_cuda else "Disabled"
+        status = "Enabled" if self._use_cuda else "Disabled"
         print(f"CUDA Support: {status}")
 
     def set_use_OpenMP(self):
         """Determine and set whether to use OpenMP."""
         # By default, try to use OpenMP
-        self.use_OpenMP = True
+        self._use_OpenMP = True
         # Additional logic can be added here to check OpenMP availability
 
     def get_use_OpenMP(self):
         """Get the use_OpenMP flag."""
-        return self.use_OpenMP
+        return self._use_OpenMP
 
     def print_use_OpenMP(self):
         """Print the OpenMP support status."""
-        status = "Enabled" if self.use_OpenMP else "Disabled"
+        status = "Enabled" if self._use_OpenMP else "Disabled"
         print(f"OpenMP Support: {status}")
 
     def set_base_dir_path(self):
@@ -307,7 +328,7 @@ def set_cpp_flags(self):
             self.cpp_flags += " -flto"
         else:
             self.cpp_flags += " -flto=auto"
-        if self.use_OpenMP:
+        if self._use_OpenMP:
             if self.os == "Darwin":
                 # Special handling for OpenMP on macOS
                 omp_env_path = f" -I{os.getenv('OMP_PATH')}" if "OMP_PATH" in os.environ else ""
@@ -338,12 +359,158 @@ def print_disable_pragma_unrolls(self):
         status = "Enabled" if self.disable_pragma_unrolls else "Disabled"
         print(f"Disable Pragma Unrolls: {status}")
 
-    # Methods for init_cudalibs_flag, init_cudalibs, show_cuda_status, show_gpu_config can be added similarly.
+    # CUDA-related methods
+
+    def _cuda_libraries_available(self):
+        """Check if CUDA libraries are available."""
+        cuda_lib = find_library("cuda")
+        nvrtc_lib = find_library("nvrtc")
+        if cuda_lib and nvrtc_lib:
+            self.libcuda_folder = os.path.dirname(self.find_library_abspath("cuda"))
+            self.libnvrtc_folder = os.path.dirname(self.find_library_abspath("nvrtc"))
+            return True
+        else:
+            return False
+
+    def find_library_abspath(self, libname):
+        """Find the absolute path of a library."""
+        libpath = find_library(libname)
+        if libpath is not None:
+            # Try to find the absolute path
+            for directory in os.environ.get('LD_LIBRARY_PATH', '').split(':'):
+                full_path = os.path.join(directory, libpath)
+                if os.path.exists(full_path):
+                    return full_path
+            # As a fallback, return the library name
+            return libpath
+        else:
+            return None
+
+    def get_include_file_abspath(self, filename):
+        """Find the absolute path of a header file."""
+        tmp_file = tempfile.NamedTemporaryFile(dir=self.default_build_path, delete=False)
+        tmp_file_name = tmp_file.name
+        tmp_file.close()
+        command = f'echo "#include <{filename}>" | {self.cxx_compiler} -M -E -x c++ - | head -n 2 > {tmp_file_name}'
+        os.system(command)
+        with open(tmp_file_name, 'r') as f:
+            content = f.read()
+        os.remove(tmp_file_name)
+        strings = content.split()
+        for s in strings:
+            if filename in s:
+                return s.strip()
+        return None
+
+
+    def get_cuda_include_path(self):
+        """Attempt to find the CUDA include path."""
+        # First, check the CUDA_PATH and CUDA_HOME environment variables
+        for env_var in ["CUDA_PATH", "CUDA_HOME"]:
+            path = os.getenv(env_var)
+            if path:
+                include_path = Path(path) / "include"
+                if (include_path / "cuda.h").is_file() and (include_path / "nvrtc.h").is_file():
+                    self.cuda_include_path = str(include_path)
+                    return
+        # Check if CUDA is installed via conda
+        conda_prefix = os.getenv("CONDA_PREFIX")
+        if conda_prefix:
+            include_path = Path(conda_prefix) / "include"
+            if (include_path / "cuda.h").is_file() and (include_path / "nvrtc.h").is_file():
+                self.cuda_include_path = str(include_path)
+                return
+        # Check standard locations
+        cuda_version_str = self.get_cuda_version(out_type="string")
+        possible_paths = [
+            Path("/usr/local/cuda"),
+            Path(f"/usr/local/cuda-{cuda_version_str}"),
+            Path("/opt/cuda"),
+        ]
+        for base_path in possible_paths:
+            include_path = base_path / "include"
+            if (include_path / "cuda.h").is_file() and (include_path / "nvrtc.h").is_file():
+                self.cuda_include_path = str(include_path)
+                return
+        # Use get_include_file_abspath to locate headers
+        cuda_h_path = self.get_include_file_abspath("cuda.h")
+        nvrtc_h_path = self.get_include_file_abspath("nvrtc.h")
+        if cuda_h_path and nvrtc_h_path:
+            if os.path.dirname(cuda_h_path) == os.path.dirname(nvrtc_h_path):
+                self.cuda_include_path = os.path.dirname(cuda_h_path)
+                return
+        # If not found, issue a warning
+        KeOps_Warning(
+            "CUDA include path not found. Please set the CUDA_PATH or CUDA_HOME environment variable."
+        )
+        self.cuda_include_path = None
+
+
+    def get_cuda_version(self, out_type="single_value"):
+        """Retrieve the installed CUDA runtime version."""
+        try:
+            libcudart = ctypes.CDLL(find_library("cudart"))
+            cuda_version = ctypes.c_int()
+            libcudart.cudaRuntimeGetVersion(ctypes.byref(cuda_version))
+            cuda_version_value = int(cuda_version.value)
+            if out_type == "single_value":
+                self.cuda_version = cuda_version_value
+                return cuda_version_value
+            major = cuda_version_value // 1000
+            minor = (cuda_version_value % 1000) // 10
+            if out_type == "major,minor":
+                return major, minor
+            elif out_type == "string":
+                return f"{major}.{minor}"
+        except Exception as e:
+            KeOps_Warning(f"Could not determine CUDA version: {e}")
+            self.cuda_version = None
+            return None
+
+    def get_gpu_props(self):
+        """Retrieve GPU properties and set related attributes."""
+        try:
+            libcuda = ctypes.CDLL(find_library("cuda"))
+            nGpus = ctypes.c_int()
+            result = libcuda.cuInit(0)
+            if result != self.CUDA_SUCCESS:
+                KeOps_Warning("cuInit failed; no CUDA driver available.")
+                self.n_gpus = 0
+                return
+            result = libcuda.cuDeviceGetCount(ctypes.byref(nGpus))
+            if result != self.CUDA_SUCCESS:
+                KeOps_Warning("cuDeviceGetCount failed.")
+                self.n_gpus = 0
+                return
+            self.n_gpus = nGpus.value
+            self.gpu_compile_flags = f"-DMAXIDGPU={self.n_gpus - 1} "
+            for d in range(self.n_gpus):
+                device = ctypes.c_int()
+                libcuda.cuDeviceGet(ctypes.byref(device), d)
+                max_threads = ctypes.c_int()
+                libcuda.cuDeviceGetAttribute(
+                    ctypes.byref(max_threads),
+                    self.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+                    device
+                )
+                shared_mem = ctypes.c_int()
+                libcuda.cuDeviceGetAttribute(
+                    ctypes.byref(shared_mem),
+                    self.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
+                    device
+                )
+                self.gpu_compile_flags += f"-DMAXTHREADSPERBLOCK{d}={max_threads.value} "
+                self.gpu_compile_flags += f"-DSHAREDMEMPERBLOCK{d}={shared_mem.value} "
+        except Exception as e:
+            KeOps_Warning(f"Error retrieving GPU properties: {e}")
+            self.n_gpus = 0
+
+    # Environment variables printing method
 
     def print_environment_variables(self):
         """Print relevant environment variables."""
         print("\nEnvironment Variables:")
-        env_vars = ["KEOPS_CACHE_FOLDER", "CUDA_VISIBLE_DEVICES", "CXX", "CXXFLAGS", "OMP_PATH", "CONDA_DEFAULT_ENV"]
+        env_vars = ["KEOPS_CACHE_FOLDER", "CUDA_VISIBLE_DEVICES", "CXX", "CXXFLAGS", "OMP_PATH", "CONDA_DEFAULT_ENV", "CUDA_PATH"]
         for var in env_vars:
             value = os.environ.get(var, None)
             if value:
@@ -351,7 +518,6 @@ def print_environment_variables(self):
             else:
                 print(f"{var} is not set")
 
-
     def print_all(self):
         """
         Print all configuration settings and system health status in a clear and organized manner,
@@ -395,22 +561,24 @@ def print_all(self):
             print(f"  {cross_mark} Compiler '{self.cxx_compiler}' not found on the system.")
 
         # OpenMP Support
-        openmp_status = check_mark if self.use_OpenMP else cross_mark
+        openmp_status = check_mark if self._use_OpenMP else cross_mark
         print(f"\nOpenMP Support")
         print("-" * 60)
         self.print_use_OpenMP()
-        if not self.use_OpenMP:
+        if not self._use_OpenMP:
             print(f"  {cross_mark} OpenMP support is disabled or not available.")
 
         # CUDA Support
-        cuda_status = check_mark if self.use_cuda else cross_mark
+        cuda_status = check_mark if self._use_cuda else cross_mark
         print(f"\nCUDA Support")
         print("-" * 60)
         self.print_use_cuda()
-        if self.use_cuda:
-            # CUDA is enabled; display CUDA configuration details
-            # Get CUDA include path from environment variables
-            cuda_include_path = os.environ.get('CUDA_PATH') or os.environ.get('CUDA_HOME')
+        if self._use_cuda:
+            print(f"CUDA Version: {self.cuda_version}")
+            print(f"Number of GPUs: {self.n_gpus}")
+            print(f"GPU Compile Flags: {self.gpu_compile_flags}")
+            # CUDA Include Path
+            cuda_include_path = self.cuda_include_path
             cuda_include_status = check_mark if cuda_include_path else cross_mark
             print(f"CUDA Include Path: {cuda_include_path or 'Not Found'} {cuda_include_status}")
 
@@ -422,7 +590,7 @@ def print_all(self):
                 print(f"  {cross_mark} CUDA compiler 'nvcc' not found in PATH.")
         else:
             # CUDA is disabled; display the CUDA message
-            print(f"  {cross_mark} CUDA support is disabled or not available.")
+            print(f"  {cross_mark} {self.cuda_message}")
 
         # Conda or Virtual Environment Paths
         print(f"\nEnvironment Paths")
@@ -454,7 +622,7 @@ def print_all(self):
             status = check_mark if path_exists else cross_mark
             print(f"{name}: {path} {status}")
             if not path_exists:
-                print(f"  {cross_mark} Path '{path}' does not exist.")
+                print(f"Path '{path}' does not exist.")
 
         # JIT Binary
         jit_binary_path = Path(self.jit_binary)
@@ -466,11 +634,7 @@ def print_all(self):
         # Environment Variables
         print(f"\nEnvironment Variables")
         print("-" * 60)
-        env_vars = ["KEOPS_CACHE_FOLDER", "CUDA_VISIBLE_DEVICES", "CXX", "CXXFLAGS", "OMP_PATH", "CONDA_DEFAULT_ENV"]
-        for var in env_vars:
-            value = os.environ.get(var, None)
-            status = check_mark if value else cross_mark
-            print(f"{var}: {value or 'Not Set'} {status}")
+        self.print_environment_variables()
 
         # Conclusion
         print("\nConfiguration Status Summary")
@@ -479,12 +643,13 @@ def print_all(self):
         issues = []
         if not compiler_available:
             issues.append(f"{cross_mark} C++ compiler '{self.cxx_compiler}' not found.")
-        if not self.use_OpenMP:
+        if not self._use_OpenMP:
             issues.append(f"{cross_mark} OpenMP support is disabled or not available.")
-        if self.use_cuda:
+        if self._use_cuda:
+            nvcc_path = shutil.which('nvcc')
             if not nvcc_path:
                 issues.append(f"{cross_mark} CUDA compiler 'nvcc' not found.")
-            if not cuda_include_path:
+            if not self.cuda_include_path:
                 issues.append(f"{cross_mark} CUDA include path not found.")
         if not Path(self.keops_cache_folder).exists():
             issues.append(f"{cross_mark} KeOps cache folder '{self.keops_cache_folder}' does not exist.")