diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index e2dad1a66..3a5fad8df 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -24,9 +24,9 @@ jobs: strategy: matrix: include: - - name: cuda12.1 - dockerfile: cuda12.1 - tags: superbench/main:cuda12.1 + - name: cuda12.2 + dockerfile: cuda12.2 + tags: superbench/main:cuda12.2 - name: cuda11.1.1 dockerfile: cuda11.1.1 tags: superbench/main:cuda11.1.1,superbench/superbench:latest diff --git a/dockerfile/cuda12.1.dockerfile b/dockerfile/cuda12.2.dockerfile similarity index 80% rename from dockerfile/cuda12.1.dockerfile rename to dockerfile/cuda12.2.dockerfile index 2f9e430fa..a3cf01f33 100644 --- a/dockerfile/cuda12.1.dockerfile +++ b/dockerfile/cuda12.2.dockerfile @@ -1,16 +1,16 @@ -FROM nvcr.io/nvidia/pytorch:23.03-py3 +FROM nvcr.io/nvidia/pytorch:23.10-py3 # OS: -# - Ubuntu: 20.04 -# - OpenMPI: 4.1.5a1 +# - Ubuntu: 22.04 +# - OpenMPI: 4.1.5rc2 # - Docker Client: 20.10.8 # NVIDIA: -# - CUDA: 12.1.0 -# - cuDNN: 8.8.1.3 -# - NCCL: v2.17.1-1 +# - CUDA: 12.2.2 +# - cuDNN: 8.9.5 +# - NCCL: v2.19.3-1 # Mellanox: -# - OFED: 5.2-2.2.3.0 # TODO -# - HPC-X: v2.14 +# - OFED: 23.07-0.5.1.2 +# - HPC-X: v2.16 # Intel: # - mlc: v3.10 @@ -74,20 +74,20 @@ RUN mkdir -p /root/.ssh && \ echo "root soft nofile 1048576\nroot hard nofile 1048576" >> /etc/security/limits.conf # Install OFED -ENV OFED_VERSION=5.2-2.2.3.0 +ENV OFED_VERSION=23.07-0.5.1.2 RUN cd /tmp && \ - wget -q https://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \ - tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \ - MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \ + wget -q https://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-x86_64.tgz && \ + tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-x86_64.tgz && \ + MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --without-ucx-cuda --force --all && \ rm -rf /tmp/MLNX_OFED_LINUX-${OFED_VERSION}* # Install HPC-X -ENV HPCX_VERSION=v2.14 +ENV HPCX_VERSION=v2.16 RUN cd /opt && \ rm -rf hpcx && \ - wget -q https://content.mellanox.com/hpc/hpc-x/${HPCX_VERSION}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda12-gdrcopy2-nccl2.17-x86_64.tbz -O hpcx.tbz && \ + wget -q https://content.mellanox.com/hpc/hpc-x/${HPCX_VERSION}/hpcx-${HPCX_VERSION}-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -O hpcx.tbz && \ tar xf hpcx.tbz && \ - mv hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda12-gdrcopy2-nccl2.17-x86_64 hpcx && \ + mv hpcx-${HPCX_VERSION}-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64 hpcx && \ rm hpcx.tbz # Install Intel MLC @@ -131,7 +131,8 @@ ADD third_party third_party RUN make -C third_party cuda ADD . . -RUN python3 -m pip install --no-cache-dir .[nvworker] && \ +RUN python3 -m pip install --upgrade setuptools==65.7 && \ + python3 -m pip install --no-cache-dir .[nvworker] && \ make cppbuild && \ make postinstall && \ rm -rf .git diff --git a/setup.py b/setup.py index 23c796833..a05dcfda4 100644 --- a/setup.py +++ b/setup.py @@ -213,7 +213,8 @@ def run(self): ], 'ort': [ 'onnx>=1.10.2', - 'onnxruntime-gpu==1.10.0', + 'onnxruntime-gpu==1.10.0; python_version<"3.10"', + 'onnxruntime-gpu; python_version>="3.10"', ], 'nvidia': ['py3nvml>=0.2.6'], } diff --git a/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_benchmark.h b/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_benchmark.h index a432c6392..9092ad024 100644 --- a/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_benchmark.h +++ b/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_benchmark.h @@ -366,8 +366,8 @@ void CublasFunction::matrix_calculation_on_cpu_with_data(const T1 *Parameter_0_0 for (int j = 0; j < n; j++) { (*Result_cpu)[i + j * m + b * m * n] = beta * (T2)(Result_3_0_host[i + j * m + b * m * n]); for (int p = 0; p < k; p++) { - (*Result_cpu)[i + j * m + b * m * n] += - Parameter_0_0_host_op[p * m + i + b * m * k] * Parameter_1_0_host_op[j * k + p + b * k * n]; + (*Result_cpu)[i + j * m + b * m * n] += (T2)(Parameter_0_0_host_op[p * m + i + b * m * k] * + Parameter_1_0_host_op[j * k + p + b * k * n]); (*Result_cpu)[i + j * m + b * m * n] *= alpha; } } @@ -444,7 +444,7 @@ int CublasFunction::check_result(int batch_count, T1 *Result_3_0, T2 *Result_cpu // |_cpu - _gpu|/|_cpu|/dot_length < eps int error_count = 0; for (int i = 0; i < static_cast(m * n) * batch_count; i++) { - double abs_err = fabs(Result_cpu[i] - Result_3_0_host[i]); + double abs_err = fabs(Result_cpu[i] - (T2)(Result_3_0_host[i])); double dot_length = k; double abs_val = fabs(Result_cpu[i]); double rel_err = abs_err / abs_val / dot_length;