diff --git a/dockerfile/rocm6.0.x.dockerfile b/dockerfile/rocm6.0.x.dockerfile index 35d87a749..ddea3c103 100644 --- a/dockerfile/rocm6.0.x.dockerfile +++ b/dockerfile/rocm6.0.x.dockerfile @@ -7,8 +7,12 @@ FROM ${BASE_IMAGE} # - Docker Client: 20.10.8 # ROCm: # - ROCm: 6.0 -# Pytorch: +# Lib: # - torch: 2.0.1 +# - rccl: 2.18.3+hip6.0 develop:7e1cbb4 +# - hipblaslt: rocm-6.0.0(tag) +# - openmpi: 4.1.x +# - apex: 1.0.0 # Intel: # - mlc: v3.10 @@ -107,26 +111,23 @@ RUN if ! command -v ofed_info >/dev/null 2>&1; then \ # Add target file to help determine which device(s) to build for ENV ROCM_PATH=/opt/rocm -RUN bash -c 'echo -e "gfx90a:xnack-\ngfx90a:xnac+\ngfx940\ngfx941\ngfx942\ngfx1030\ngfx1100\ngfx1101\ngfx1102\n" >> ${ROCM_PATH}/bin/target.lst' +RUN bash -c 'echo -e "gfx90a:xnack-\ngfx90a:xnac+\ngfx940\ngfx941\ngfx942:sramecc+:xnack-\n" >> ${ROCM_PATH}/bin/target.lst' # Install OpenMPI ENV OPENMPI_VERSION=4.1.x # Check if Open MPI is installed -RUN if [ -z "$(command -v mpirun)" ]; then \ - echo "Open MPI not found. Installing Open MPI..." && \ - cd /tmp && \ +RUN cd /tmp && \ git clone --recursive https://github.com/open-mpi/ompi.git -b v${OPENMPI_VERSION} && \ cd ompi && \ ./autogen.pl && \ mkdir build && \ cd build && \ - ../configure --prefix=/usr/local --enable-orterun-prefix-by-default --enable-mpirun-prefix-by-default --enable-prte-prefix-by-default --with-rocm=/opt/rocm && \ + ../configure --prefix=/usr/local/mpi --enable-orterun-prefix-by-default --enable-mpirun-prefix-by-default --enable-prte-prefix-by-default --with-rocm=/opt/rocm && \ make -j $(nproc) && \ make -j $(nproc) install && \ ldconfig && \ cd / && \ - rm -rf /tmp/openmpi-${OPENMPI_VERSION}* ;\ - fi + rm -rf /tmp/openmpi-${OPENMPI_VERSION}* # Install Intel MLC RUN cd /tmp && \ @@ -146,6 +147,12 @@ RUN cd /opt/ && \ .. && \ make -j${NUM_MAKE_JOBS} +# Apply patch +RUN cd third_party/perftest && \ + git apply ../perftest_rocm6.patch +RUN cd third_party/Megatron/Megatron-DeepSpeed && \ + git apply ../megatron_deepspeed_rocm6.patch + ENV PATH="/opt/superbench/bin:/usr/local/bin/:/opt/rocm/hip/bin/:/opt/rocm/bin/:${PATH}" \ LD_PRELOAD="/opt/rccl/build/librccl.so:$LD_PRELOAD" \ LD_LIBRARY_PATH="/usr/local/lib/:/opt/rocm/lib:${LD_LIBRARY_PATH}" \ @@ -164,11 +171,11 @@ RUN apt install rocm-cmake -y && \ WORKDIR ${SB_HOME} ADD third_party third_party -RUN make RCCL_HOME=/opt/rccl/build/ MPI_HOME=$MPI_HOME ROCBLAS_BRANCH=release/rocm-rel-6.0 HIPBLASLT_BRANCH=release-staging/rocm-rel-6.0 ROCM_VER=rocm-5.5.0 -C third_party rocm -o cpu_hpl -o cpu_stream -o megatron_lm +RUN make RCCL_HOME=/opt/rccl/build/ MPI_HOME=$MPI_HOME ROCBLAS_BRANCH=release/rocm-rel-6.0 HIPBLASLT_BRANCH=rocm-6.0.0 ROCM_VER=rocm-5.5.0 -C third_party rocm -o cpu_hpl -o cpu_stream -o megatron_lm ADD . . ENV USE_HIP_DATATYPE=1 -ENV CXX=/opt/rocm/bin/hipcc +ENV USE_HIPBLAS_COMPUTETYPE=1 RUN python3 -m pip install .[amdworker] && \ - make cppbuild && \ + CXX=/opt/rocm/bin/hipcc make cppbuild && \ make postinstall diff --git a/third_party/Megatron/megatron_deepspeed_rocm6.patch b/third_party/Megatron/megatron_deepspeed_rocm6.patch new file mode 100644 index 000000000..39a1dc27b --- /dev/null +++ b/third_party/Megatron/megatron_deepspeed_rocm6.patch @@ -0,0 +1,39 @@ +diff --git a/megatron/fused_kernels/scaled_masked_softmax_cuda.cu b/megatron/fused_kernels/scaled_masked_softmax_cuda.cu +index 76086de..1533648 100644 +--- a/megatron/fused_kernels/scaled_masked_softmax_cuda.cu ++++ b/megatron/fused_kernels/scaled_masked_softmax_cuda.cu +@@ -4,7 +4,7 @@ + #include + #include + #include +-#ifndef __HIP_PLATFORM_HCC__ ++#ifndef __HIP_PLATFORM_AMD__ + #include + #endif + #include +diff --git a/megatron/fused_kernels/scaled_softmax_cuda.cu b/megatron/fused_kernels/scaled_softmax_cuda.cu +index 90e1c9f..d217aec 100644 +--- a/megatron/fused_kernels/scaled_softmax_cuda.cu ++++ b/megatron/fused_kernels/scaled_softmax_cuda.cu +@@ -4,7 +4,7 @@ + #include + #include + #include +-#ifndef __HIP_PLATFORM_HCC__ ++#ifndef __HIP_PLATFORM_AMD__ + #include + #endif + #include +diff --git a/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu b/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu +index 74c9f3d..03b5fc8 100644 +--- a/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu ++++ b/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu +@@ -4,7 +4,7 @@ + #include + #include + #include +-#ifndef __HIP_PLATFORM_HCC__ ++#ifndef __HIP_PLATFORM_AMD__ + #include + #endif + #include diff --git a/third_party/perftest_rocm6.patch b/third_party/perftest_rocm6.patch new file mode 100644 index 000000000..3394e5b68 --- /dev/null +++ b/third_party/perftest_rocm6.patch @@ -0,0 +1,35 @@ +diff --git a/configure.ac b/configure.ac +index 20eceda..5b5c5ab 100755 +--- a/configure.ac ++++ b/configure.ac +@@ -237,13 +237,13 @@ AC_ARG_WITH([rocm], + ], + [AS_CASE([$with_rocm], + [yes|no], [], +- [CPPFLAGS="-I$with_rocm/include $CPPFLAGS" ++ [CPPFLAGS="-I$with_rocm/include -D__HIP_PLATFORM_AMD__=1 $CPPFLAGS" + LDFLAGS="-L$with_rocm/lib64 -Wl,-rpath=$with_rocm/lib64 -L$with_rocm/lib -Wl,-rpath=$with_rocm/lib -lamdhip64 $LDFLAGS"]) + ]) + + AS_IF([test "x$enable_rocm" = xyes], [ + AC_DEFINE([__HIP_PLATFORM_HCC__], [1], [Enable ROCm]) +- AC_CHECK_HEADERS([hip/hip_runtime_api.h], [], ++ AC_CHECK_HEADERS([/opt/rocm/include/hip/hip_runtime_api.h], [], + [AC_MSG_ERROR([cannot include hip/hip_runtime_api.h])]) + AC_SEARCH_LIBS([hipFree], [amdhip64], [], + [AC_MSG_ERROR([cannot link with -lamdhip64])]) +diff --git a/src/rocm_memory.c b/src/rocm_memory.c +index e9a9136..cc028c9 100644 +--- a/src/rocm_memory.c ++++ b/src/rocm_memory.c +@@ -44,8 +44,8 @@ static int init_rocm(int device_id) { + + hipDeviceProp_t prop = {0}; + ROCM_CHECK(hipGetDeviceProperties(&prop, device_id)); +- printf("Using ROCm Device with ID: %d, Name: %s, PCI Bus ID: 0x%x, GCN Arch: %d\n", +- device_id, prop.name, prop.pciBusID, prop.gcnArch); ++ printf("Using ROCm Device with ID: %d, Name: %s, PCI Bus ID: 0x%x, GCN Arch: %s\n", ++ device_id, prop.name, prop.pciBusID, prop.gcnArchName); + + return SUCCESS; + }