Skip to content

Commit

Permalink
CUDA Test 9
Browse files Browse the repository at this point in the history
  • Loading branch information
onurulgen committed Feb 6, 2024
1 parent b09a7a6 commit 98777c1
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 99 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,11 @@ jobs:
run: |
mkdir build
cd build
cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \
-DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \
cmake -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \
-DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DBUILD_ALL_DEP=ON \
-DCHECK_GPU=OFF \
-DUSE_CUDA=${{ matrix.use-cuda }} \
-DUSE_OPENCL=OFF \
-DUSE_SSE=ON \
Expand Down Expand Up @@ -91,15 +92,15 @@ jobs:
working-directory: build/reg-apps
run: |
mkdir -p ${{ steps.vars.outputs.output-folder }}
find . -maxdepth 1 -type f -executable -exec cp {} ${{ steps.vars.outputs.output-folder }} \;
find . -maxdepth 1 -type f -executable -exec mv {} ${{ steps.vars.outputs.output-folder }} \;
zip -r ../NiftyReg.zip ${{ steps.vars.outputs.output-folder }}
- name: Prepare the package
if: matrix.os-name == 'macOS'
working-directory: build/reg-apps
run: |
mkdir -p ${{ steps.vars.outputs.output-folder }}
find . -maxdepth 1 -type f -perm +111 -exec cp {} ${{ steps.vars.outputs.output-folder }} \;
find . -maxdepth 1 -type f -perm +111 -exec mv {} ${{ steps.vars.outputs.output-folder }} \;
zip -r ../NiftyReg.zip ${{ steps.vars.outputs.output-folder }}
- name: Upload the package
Expand Down
22 changes: 9 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
cmake_minimum_required(VERSION 3.2.2)
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY)
else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY)
endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
cmake_minimum_required(VERSION 3.18)
#-----------------------------------------------------------------------------
project(NiftyReg)
#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -72,6 +67,7 @@ option(USE_CUDA "To use the CUDA platform" OFF)
option(USE_OPENCL "To use the OpenCL platform" OFF)
option(USE_OPENMP "To use openMP for multi-CPU processing" ON)
option(USE_SSE "To enable SSE computation in some case" ON)
option(CHECK_GPU "To check if a GPU is available" ON)
#-----------------------------------------------------------------------------
option(USE_NRRD "To use the NRRD file format" OFF)
mark_as_advanced(USE_NRRD)
Expand Down Expand Up @@ -157,19 +153,19 @@ if(USE_OPENCL)
endif(USE_OPENCL)
#-----------------------------------------------------------------------------
if(USE_CUDA)
# Check if the CUDA drivers are available
find_package(CUDA REQUIRED)
mark_as_advanced(CUDA_SDK_ROOT_DIR)
# Check if the CUDA Toolkit is available
enable_language(CUDA)
find_package(CUDAToolkit)
option(CUDA_FAST_MATH "To use the fast math flag" OFF)
mark_as_advanced(CUDA_FAST_MATH)
if(NOT CUDA_FOUND)
if(NOT CMAKE_CUDA_COMPILER)
set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF")
else(NOT CUDA_FOUND)
else(NOT CMAKE_CUDA_COMPILER)
include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda)
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
add_definitions(-DUSE_CUDA)
endif(NOT CUDA_FOUND)
endif(NOT CMAKE_CUDA_COMPILER)
endif(USE_CUDA)
#-----------------------------------------------------------------------------
if(USE_SSE)
Expand Down
2 changes: 1 addition & 1 deletion niftyreg_build_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
403
404
23 changes: 7 additions & 16 deletions reg-apps/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
include_directories(${CMAKE_CURRENT_BINARY_DIR})

#-----------------------------------------------------------------------------
add_executable(reg_average reg_average.cpp)
target_link_libraries(reg_average _reg_resampling _reg_globalTrans _reg_localTrans _reg_maths _reg_tools _reg_ReadWriteImage)
Expand All @@ -24,19 +23,11 @@ add_executable(reg_jacobian reg_jacobian.cpp)
target_link_libraries(reg_jacobian _reg_resampling _reg_localTrans _reg_tools _reg_globalTrans _reg_ReadWriteImage)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_jacobian.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_jacobian.h @ONLY)
#-----------------------------------------------------------------------------
if(USE_CUDA)
cuda_add_executable(reg_f3d reg_f3d.cpp)
else(USE_CUDA)
add_executable(reg_f3d reg_f3d.cpp)
endif(USE_CUDA)
add_executable(reg_f3d reg_f3d.cpp)
target_link_libraries(reg_f3d _reg_f3d)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_f3d.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_f3d.h @ONLY)
#-----------------------------------------------------------------------------
if(USE_CUDA)
cuda_add_executable(reg_aladin reg_aladin.cpp)
else(USE_CUDA)
add_executable(reg_aladin reg_aladin.cpp)
endif(USE_CUDA)
add_executable(reg_aladin reg_aladin.cpp)
target_link_libraries(reg_aladin _reg_aladin)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_aladin.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_aladin.h @ONLY)
#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -65,11 +56,11 @@ if(USE_CUDA OR USE_OPENCL)
endif(USE_CUDA OR USE_OPENCL)
#-----------------------------------------------------------------------------
foreach(MODULE_NAME ${MODULE_LIST})
install(TARGETS ${MODULE_NAME}
RUNTIME DESTINATION bin COMPONENT Runtime
LIBRARY DESTINATION lib COMPONENT Runtime
ARCHIVE DESTINATION lib COMPONENT Runtime
)
install(TARGETS ${MODULE_NAME}
RUNTIME DESTINATION bin COMPONENT Runtime
LIBRARY DESTINATION lib COMPONENT Runtime
ARCHIVE DESTINATION lib COMPONENT Runtime
)
endforeach(MODULE_NAME)
#-----------------------------------------------------------------------------
install(PROGRAMS groupwise_niftyreg_params.sh DESTINATION bin COMPONENT Runtime)
Expand Down
120 changes: 60 additions & 60 deletions reg-lib/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,63 +1,63 @@
if(CHECK_GPU)
# Compile an executable to check if there is at least one suitable graphical card
try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cu
CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR
)
# Check if the executable could not compile
if(NOT COMPILE_RESULT_VAR)
message(WARNING "The code to check the presence of a CUDA-enabled card failed.")
message("The USE_CUDA flag has been turned OFF.")
set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
return()
# Check if the executable return failure
elseif(RUN_RESULT_VAR)
message(WARNING "No CUDA-enabled card has been detected")
message("Result code: ${RUN_RESULT_VAR}")
message("Error message: ${RUN_OUTPUT_VAR}")
message("The USE_CUDA flag has been turned OFF.")
set(USE_CUDA OFF CACHE BOOL "To enable CUDA for a GPU implementation of the code" FORCE)
return()
endif(NOT COMPILE_RESULT_VAR)
message(STATUS "Found a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR})
# Check CUDA version and adjust compile flags
if("${CAPABILITY_CODE}" LESS "60")
set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
message(SEND_ERROR "CUDA cards with capability less than 6.0 are not supported. The USE_CUDA flag is turned OFF")
return()
endif("${CAPABILITY_CODE}" LESS "60")
set(CMAKE_CUDA_ARCHITECTURES "${CAPABILITY_CODE}-real")
else(CHECK_GPU)
# If no GPU check is performed, assume a minimum capability of 6.0
# Generate compiled code for all architectures supported by CUDA 11.8
# Also, generate PTX code for future architectures
# Therefore, the code should run on any GPU with a capability of 6.0 or higher
set(CMAKE_CUDA_ARCHITECTURES "60-real;61-real;70-real;75-real;80-real;86-real;89")
endif(CHECK_GPU)
#-----------------------------------------------------------------------------
# Compile an executable to check if there is at least one suitable graphical card
# try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cpp
# CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CUDA_INCLUDE_DIRS} -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
# COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
# RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR
# )
# # Check if the executable could not compile
# if(NOT COMPILE_RESULT_VAR)
# message(WARNING "The code to check the presence of a CUDA-enabled card failed.")
# message("The USE_CUDA flag has been turned OFF.")
# set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
# return()
# # Check if the executable return failure
# elseif(RUN_RESULT_VAR)
# message(WARNING "No CUDA-enabled card has been detected")
# message("Result code: ${RUN_RESULT_VAR}")
# message("Error message: ${RUN_OUTPUT_VAR}")
# message("The USE_CUDA flag has been turned OFF.")
# set(USE_CUDA OFF CACHE BOOL "To enable CUDA for a GPU implementation of the code" FORCE)
# return()
# else(NOT COMPILE_RESULT_VAR)
# message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
# Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support
set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda --expt-relaxed-constexpr")
#check cuda version and adjust compile flags
# if("${RUN_OUTPUT_VAR}" LESS "30")
# set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
# message(SEND_ERROR "CUDA card with capability 1.x or 2.x are not supported. The USE_CUDA flag is turned OFF")
# return()
# endif("${RUN_OUTPUT_VAR}" LESS "30")
# string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR})
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_89,code=sm_89")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_89,code=compute_89")
# If desired, add PIC flags
if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC)
# Add (undocumented) CMake flag that should tell the host compiler to generate position independent code
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}")
endif()
# Adjust for debug and release versions
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G")
else(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-O3")
endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -use_fast_math")
message(STATUS "CUDA fast math enabled")
endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
# endif(NOT COMPILE_RESULT_VAR)
# Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr")
# If desired, add PIC flags
if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC)
# Add (undocumented) CMake flag that should tell the host compiler to generate position independent code
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}")
endif()
# Adjust for debug and release versions
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v -g -G")
else(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-O3")
endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math")
message(STATUS "CUDA fast math enabled")
endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
#-----------------------------------------------------------------------------
set(NAME _reg_cuda_kernels)
cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
../AladinContent.cpp
affineDeformationKernel.cu
blockMatchingKernel.cu
Expand Down Expand Up @@ -86,7 +86,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
_reg_nmi_gpu.cu
_reg_ssd_gpu.cu
)
target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
target_link_libraries(${NAME} CUDA::cuda_driver)
install(TARGETS ${NAME}
RUNTIME DESTINATION lib
LIBRARY DESTINATION lib
Expand All @@ -95,8 +95,8 @@ install(TARGETS ${NAME}
set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
#-----------------------------------------------------------------------------
set(NAME _reg_cudainfo)
cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp)
target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu)
target_link_libraries(${NAME} CUDA::cuda_driver)
install(TARGETS ${NAME}
RUNTIME DESTINATION lib
LIBRARY DESTINATION lib
Expand Down
File renamed without changes.
8 changes: 3 additions & 5 deletions reg-lib/cuda/checkCudaCard.cpp → reg-lib/cuda/checkCudaCard.cu
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,26 @@ int main() {
int deviceCount = 0, output = 0;
const cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount);

// Error when running cudaGetDeviceCount
if (cudaResultCode != cudaSuccess) {
std::cerr << cudaGetErrorString(cudaResultCode) << " (CUDA Error Code=" << cudaResultCode << ")" << std::endl;
return EXIT_FAILURE;
}

// Error when running cudaGetDeviceCount
if (deviceCount == 0) {
std::cerr << "No device detected" << std::endl;
return EXIT_FAILURE;
}

// Detect device capability and picks the best
for (unsigned i = 0; i < deviceCount; ++i) {
// Detect device capability and pick the best
for (int i = 0; i < deviceCount; i++) {
cudaSetDevice(i);
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, i);
output = std::max(output, deviceProp.major * 10 + deviceProp.minor);
}

// Output for device capability
std::cout << output;
std::cout << output / 10 << "." << output % 10;

return EXIT_SUCCESS;
}

0 comments on commit 98777c1

Please sign in to comment.