From dbebe10216c2dd546d3fdb3c8222939c94c8f8ce Mon Sep 17 00:00:00 2001 From: Alexander Weinrauch Date: Wed, 27 Nov 2024 17:35:46 +0000 Subject: [PATCH] [AMD] Upgrade AMD CI docker image (#5230) This commits updates the CI to use a new docker image that contains ROCm 6.2.2 with ASan support and PyTorch 2.5.1. This also switches to ubuntu's default clang toolchain instead of using the one which comes with ROCm. --- .github/workflows/integration-tests.yml | 17 +++++------------ .github/workflows/integration-tests.yml.in | 19 +++++-------------- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 2922da501efb..9151560ce6b9 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -327,7 +327,7 @@ jobs: runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}} name: Integration-Tests (${{matrix.runner[1] == 'gfx90a' && 'mi210' || 'mi300x'}}) container: - image: rocm/pytorch:rocm6.1_ubuntu22.04_py3.10_pytorch_2.4 + image: rocmshared/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root steps: - name: Checkout @@ -396,22 +396,15 @@ jobs: mkdir -p ~/.ccache du -h -d 1 ~/.ccache - - name: Update PATH - run: | - echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH - - name: Install pip dependencies - run: | - python3 -m pip install --upgrade pip - python3 -m pip install lit - - name: Install apt dependencies + - name: Update compiler to clang run: | - apt update - apt install ccache + export CC=/usr/bin/clang + export CXX=/usr/bin/clang++ - name: Install Triton id: amd-install-triton run: | echo "PATH is '$PATH'" - pip uninstall -y triton + pip uninstall -y triton pytorch-triton-rocm cd python ccache --zero-stats pip install -v -e '.[tests]' diff --git a/.github/workflows/integration-tests.yml.in b/.github/workflows/integration-tests.yml.in index 7de7264272c1..6d72b65207dc 100644 --- a/.github/workflows/integration-tests.yml.in +++ b/.github/workflows/integration-tests.yml.in @@ -374,7 +374,7 @@ jobs: name: Integration-Tests (${{matrix.runner[1] == 'gfx90a' && 'mi210' || 'mi300x'}}) container: - image: rocm/pytorch:rocm6.1_ubuntu22.04_py3.10_pytorch_2.4 + image: rocmshared/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root steps: @@ -388,25 +388,16 @@ jobs: - *restore-build-artifacts-step - *inspect-cache-directories-step - - name: Update PATH - run: | - echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH - - - name: Install pip dependencies - run: | - python3 -m pip install --upgrade pip - python3 -m pip install lit - - - name: Install apt dependencies + - name: Update compiler to clang run: | - apt update - apt install ccache + export CC=/usr/bin/clang + export CXX=/usr/bin/clang++ - name: Install Triton id: amd-install-triton run: | echo "PATH is '$PATH'" - pip uninstall -y triton + pip uninstall -y triton pytorch-triton-rocm cd python ccache --zero-stats pip install -v -e '.[tests]'