-
Notifications
You must be signed in to change notification settings - Fork 10.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Draft] Tensor Parallel support to llama.cpp #9648
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -566,6 +566,17 @@ if (GGML_SYCL) | |
list(APPEND GGML_EXTRA_LIBS_PRIVATE DNNL::dnnl) | ||
endif() | ||
|
||
set(oneCCL_DIR "/opt/intel/oneapi/ccl/latest/lib/cmake/oneCCL") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The real oneapi path is not always in Same for following script |
||
set(MPI_INCLUDE_PATH "/opt/intel/oneapi/mpi/latest/include") | ||
set(MPI_LIBRARY_PATH "/opt/intel/oneapi/mpi/latest/lib/") | ||
set(ONECCL_INCLUDE_PATH "/opt/intel/oneapi/ccl/latest/include") | ||
set(ONECCL_LIBRARY_PATH "/opt/intel/oneapi/ccl/latest/lib/") | ||
include_directories(${MPI_INCLUDE_PATH} ${ONECCL_INCLUDE_PATH}) | ||
find_library(MPI_LIBRARY mpi HINTS ${MPI_LIBRARY_PATH}) | ||
find_library(ONECCL_LIBRARY ccl HINTS ${ONECCL_LIBRARY_PATH}) | ||
# find_package(oneCCL REQUIRED) | ||
message("-- oneCCL found") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add script for not found oneCCL. oneCCL is not included in oneAPI base toolkit, please print the message to guide user how to install it. |
||
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${MPI_LIBRARY_PATH} ${ONECCL_LIBRARY_PATH}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GGML_EXTRA_LIBS was recently split into GGML_EXTRA_LIBS_PUBLIC and GGML_EXTRA_LIBS_PRIVATE, so I think the line above won't work anymore |
||
if (WIN32) | ||
find_package(IntelSYCL REQUIRED) | ||
find_package(MKL REQUIRED) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,10 +13,13 @@ | |
#ifndef GGML_SYCL_DPCT_HELPER_HPP | ||
#define GGML_SYCL_DPCT_HELPER_HPP | ||
|
||
#include <stdlib.h> | ||
#include <sycl/sycl.hpp> | ||
#include <sycl/half_type.hpp> | ||
#include <oneapi/ccl.hpp> | ||
#include <oneapi/mkl.hpp> | ||
#include <map> | ||
#include <mpi.h> | ||
|
||
#include "ggml.h" | ||
|
||
|
@@ -870,7 +873,12 @@ namespace dpct | |
} | ||
return -1; | ||
} | ||
|
||
inline int get_rank() { return _rank; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These new functions have no relationship with DPCT. |
||
inline int get_world_size() { return _world_size; } | ||
inline ccl::communicator create_ccl_communicator(ccl::device dev, ccl::context ctx) { | ||
return ccl::create_communicator(_world_size, _rank, dev, ctx, _kvs); | ||
|
||
} | ||
inline std::string get_preferred_gpu_platform_name() { | ||
std::string result; | ||
|
||
|
@@ -993,6 +1001,31 @@ namespace dpct | |
static bool compare_backend(std::string &backend1, std::string &backend2) { | ||
return convert_backend_index(backend1) < convert_backend_index(backend2); | ||
} | ||
|
||
static void mpi_finalize() { | ||
static int is_finalized = 0; | ||
MPI_Finalized(&is_finalized); | ||
if (!is_finalized) MPI_Finalize(); | ||
} | ||
|
||
void init_ccl() { | ||
ccl::init(); | ||
MPI_Init(NULL, NULL); | ||
MPI_Comm_size(MPI_COMM_WORLD, &_world_size); | ||
MPI_Comm_rank(MPI_COMM_WORLD, &_rank); | ||
atexit(mpi_finalize); | ||
ccl::kvs::address_type main_addr; | ||
if (_rank == 0) { | ||
_kvs = ccl::create_main_kvs(); | ||
main_addr = _kvs->get_address(); | ||
MPI_Bcast((void *)main_addr.data(), main_addr.size(), MPI_BYTE, 0, MPI_COMM_WORLD); | ||
} | ||
else { | ||
MPI_Bcast((void *)main_addr.data(), main_addr.size(), MPI_BYTE, 0, MPI_COMM_WORLD); | ||
_kvs = ccl::create_kvs(main_addr); | ||
} | ||
} | ||
|
||
dev_mgr() | ||
{ | ||
sycl::device default_device = | ||
|
@@ -1050,6 +1083,7 @@ namespace dpct | |
_cpu_device = _devs.size() - 1; | ||
} | ||
} | ||
init_ccl(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mv this init() function to ggml-sycl/src. |
||
} | ||
void check_id(unsigned int id) const | ||
{ | ||
|
@@ -1066,6 +1100,10 @@ namespace dpct | |
/// thread-id to device-id map. | ||
std::map<unsigned int, unsigned int> _thread2dev_map; | ||
int _cpu_device = -1; | ||
// For tensor parallelsim | ||
int _rank = 0; | ||
int _world_size = 1; | ||
ccl::shared_ptr_class<ccl::kvs> _kvs; | ||
}; | ||
|
||
static inline sycl::queue &get_default_queue() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changes to the common ggml code should not be made unless absolutely necessary, which is not likely to be the case here. We already have a way to handle this with custom buffer types like the existing CUDA and SYCL split buffer types. You can extend this model instead by creating a different buffer type for tensors split by column. The "tensors kept on master" is just the default buffer type.