Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmarks: Add MSCCL Support for Nvidia GPU #584

Merged
merged 10 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
submodules: recursive
yzygitzh marked this conversation as resolved.
Show resolved Hide resolved
- name: Free disk space
run: |
mkdir /tmp/emptydir
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@
[submodule "third_party/gpu-burn"]
path = third_party/gpu-burn
url = https://github.com/wilicc/gpu-burn.git
[submodule "third_party/msccl"]
path = third_party/msccl
url = https://github.com/Azure/msccl
4 changes: 3 additions & 1 deletion dockerfile/cuda12.2.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ RUN apt-get update && \
libavutil-dev \
libboost-program-options-dev \
libcap2 \
libcurl4-openssl-dev \
libnuma-dev \
libpci-dev \
libswresample-dev \
libtinfo5 \
libtool \
lshw \
net-tools \
nlohmann-json3-dev \
openssh-client \
openssh-server \
pciutils \
Expand Down Expand Up @@ -128,7 +130,7 @@ ADD dockerfile/etc /opt/microsoft/
WORKDIR ${SB_HOME}

ADD third_party third_party
RUN make -C third_party cuda
RUN make -C third_party cuda_with_msccl
yzygitzh marked this conversation as resolved.
Show resolved Hide resolved

ADD . .
RUN python3 -m pip install --upgrade setuptools==65.7 && \
Expand Down
4 changes: 4 additions & 0 deletions superbench/config/azure_ndmv4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ superbench:
- name: mpi
proc_num: 8
node_num: 1
env:
LD_PRELOAD: '/opt/superbench/lib/msccl-executor-nccl/lib/libnccl.so:$LD_PRELOAD'
MSCCL_SCHEDULER: '/opt/superbench/lib/msccl-scheduler/lib/libmsccl-scheduler.so'
MSCCL_ALGO_DIR: '/opt/superbench/lib/msccl-scheduler/lib/msccl-algorithms/ndv4'
yzygitzh marked this conversation as resolved.
Show resolved Hide resolved
parameters:
maxbytes: 16M
warmup_iters: 20
Expand Down
4 changes: 4 additions & 0 deletions superbench/config/azure_ndv4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ superbench:
- name: mpi
proc_num: 8
node_num: 1
env:
LD_PRELOAD: '/opt/superbench/lib/msccl-executor-nccl/lib/libnccl.so:$LD_PRELOAD'
MSCCL_SCHEDULER: '/opt/superbench/lib/msccl-scheduler/lib/libmsccl-scheduler.so'
MSCCL_ALGO_DIR: '/opt/superbench/lib/msccl-scheduler/lib/msccl-algorithms/ndv4'
yzygitzh marked this conversation as resolved.
Show resolved Hide resolved
parameters:
maxbytes: 16M
warmup_iters: 20
Expand Down
26 changes: 25 additions & 1 deletion third_party/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ HPCX_HOME ?= /opt/hpcx
CUDA_VER ?= $(shell nvcc --version | grep 'release' | awk '{print $$6}' | cut -c2- | cut -d '.' -f1-2)
ROCBLAS_BRANCH ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3)

.PHONY: all cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt
.PHONY: all cuda_with_msccl cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest cuda_msccl rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt

# Build all targets.
all: cuda rocm
cuda_with_msccl: cuda cuda_msccl
cuda: common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn
rocm: common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest rocm_hipblaslt
cpu: common cpu_perftest
Expand Down Expand Up @@ -171,3 +172,26 @@ directx_amf_encoding_latency:
del vs_buildtools.exe && echo "Deleted vs_buildtools.exe" && \
"C:\temp\BuildTools\MSBuild\Current\Bin\MSBuild.exe" "AMF\amf\public\samples\CPPSamples_vs2019.sln" /t:EncoderLatency /p:Platform=x64 /p:Configuration=Release /p:OutDir="%SB_MICRO_PATH%\bin" \
)

# Build MSCCL for CUDA
cuda_msccl: sb_micro_path
ifneq (,$(wildcard msccl/executor/msccl-executor-nccl/Makefile))
cd ./msccl/executor/msccl-executor-nccl && \
make -j4 src.build && \
cd ../../..
mkdir -p $(SB_MICRO_PATH)/lib/msccl-executor-nccl && \
cp -r -v ./msccl/executor/msccl-executor-nccl/build/* $(SB_MICRO_PATH)/lib/msccl-executor-nccl/
endif
ifneq (,$(wildcard msccl/scheduler/msccl-scheduler/Makefile))
cd ./msccl/scheduler/msccl-scheduler && \
CXX=nvcc BIN_HOME=$(SB_MICRO_PATH)/lib/msccl-executor-nccl SRC_HOME=../../../msccl/executor/msccl-executor-nccl make -j4 && \
cd ../../..
mkdir -p $(SB_MICRO_PATH)/lib/msccl-scheduler && \
cp -r -v ./msccl/scheduler/msccl-scheduler/build/* $(SB_MICRO_PATH)/lib/msccl-scheduler/
endif
ifneq (,$(wildcard msccl/tests/msccl-tests-nccl/Makefile))
cd ./msccl/tests/msccl-tests-nccl && \
make MPI=1 MPI_HOME=$(MPI_HOME) NCCL_HOME=$(SB_MICRO_PATH)/lib/msccl-executor-nccl -j4 && cd ../../..
mkdir -p $(SB_MICRO_PATH)/bin/msccl-tests-nccl && \
cp -r -v ./msccl/tests/msccl-tests-nccl/build/* $(SB_MICRO_PATH)/bin/msccl-tests-nccl/
endif
1 change: 1 addition & 0 deletions third_party/msccl
Submodule msccl added at 7d4beb
Loading