diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 649ac3dcc4f63a..eee59ae31a618d 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -107,6 +107,7 @@ set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING option(GGML_LLAMAFILE "ggml: use LLAMAFILE" OFF) option(GGML_CUDA "ggml: use CUDA" OFF) +option(GGML_MUSA "ggml: use MUSA" OFF) option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF) option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF) option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index c5ee7e4255ee5f..971c6b616a728f 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -139,6 +139,19 @@ if (GGML_METAL) ) endif() +if (GGML_MUSA) + set(CMAKE_C_COMPILER clang) + set(CMAKE_C_EXTENSIONS OFF) + set(CMAKE_CXX_COMPILER clang++) + set(CMAKE_CXX_EXTENSIONS OFF) + + set(GGML_CUDA ON) + + list(APPEND GGML_CDEF_PUBLIC GGML_USE_MUSA) + + add_compile_definitions(GGML_USE_MUSA) +endif() + if (GGML_OPENMP) find_package(OpenMP) if (OpenMP_FOUND) @@ -147,6 +160,11 @@ if (GGML_OPENMP) add_compile_definitions(GGML_USE_OPENMP) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX) + + if (GGML_MUSA) + set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} "/usr/lib/llvm-10/include/openmp") + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} "/usr/lib/llvm-10/lib/libomp.so") + endif() else() message(WARNING "OpenMP not found") endif() @@ -249,7 +267,13 @@ endif() if (GGML_CUDA) cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES - find_package(CUDAToolkit) + if (GGML_MUSA) + list(APPEND CMAKE_MODULE_PATH "/usr/local/musa/cmake/") + find_package(MUSAToolkit) + set(CUDAToolkit_FOUND ${MUSAToolkit_FOUND}) + else() + find_package(CUDAToolkit) + endif() if (CUDAToolkit_FOUND) message(STATUS "CUDA found") @@ -268,7 +292,11 @@ if (GGML_CUDA) endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") - enable_language(CUDA) + if (GGML_MUSA) + set(CMAKE_CUDA_COMPILER ${MUSAToolkit_MCC_EXECUTABLE}) + else() + enable_language(CUDA) + endif() file(GLOB GGML_HEADERS_CUDA "ggml-cuda/*.cuh") list(APPEND GGML_HEADERS_CUDA "../include/ggml-cuda.h") @@ -332,21 +360,40 @@ if (GGML_CUDA) add_compile_definitions(GGML_CUDA_NO_PEER_COPY) endif() + if (GGML_MUSA) + set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX) + foreach(SOURCE ${GGML_SOURCES_CUDA}) + set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_22") + endforeach() + endif() + if (GGML_STATIC) if (WIN32) # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt) else () - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + if (GGML_MUSA) + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart_static MUSA::mublas_static) + else() + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + endif() endif() else() - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) + if (GGML_MUSA) + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart MUSA::mublas) + else() + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) + endif() endif() if (GGML_CUDA_NO_VMM) # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) else() - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... + if (GGML_MUSA) + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ... + else() + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... + endif() endif() else() message(WARNING "CUDA not found") @@ -757,8 +804,10 @@ function(get_flags CCID CCVER) set(C_FLAGS -Wdouble-promotion) set(CXX_FLAGS -Wno-array-bounds) - if (CCVER VERSION_GREATER_EQUAL 7.1.0) - list(APPEND CXX_FLAGS -Wno-format-truncation) + if (NOT GGML_MUSA) + if (CCVER VERSION_GREATER_EQUAL 7.1.0) + list(APPEND CXX_FLAGS -Wno-format-truncation) + endif() endif() if (CCVER VERSION_GREATER_EQUAL 8.1.0) list(APPEND CXX_FLAGS -Wextra-semi) @@ -1059,7 +1108,9 @@ if (GGML_CUDA) list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "") - list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED}) + # list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED}) + # XXX: Removed flags: -Xcompiler + list(APPEND CUDA_FLAGS ${CUDA_CXX_FLAGS_JOINED}) endif() add_compile_options("$<$:${CUDA_FLAGS}>") @@ -1163,6 +1214,7 @@ endif() target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC}) target_include_directories(ggml PUBLIC ../include) target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES}) +target_link_directories(ggml PRIVATE ${GGML_EXTRA_LIBDIRS}) target_compile_features (ggml PRIVATE c_std_11) # don't bump target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS})