From 050cbbd6705e177307ddfa4a068fdc5713cfade5 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Sat, 18 Nov 2023 09:59:19 +0100 Subject: [PATCH] try gau's commit --- .github/workflows/self-scheduled-amd.yml | 42 +++++++++++------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml index e6e185af3d1b52..83c3a7496d73ec 100644 --- a/.github/workflows/self-scheduled-amd.yml +++ b/.github/workflows/self-scheduled-amd.yml @@ -43,18 +43,19 @@ jobs: machine_type: [single-gpu, multi-gpu] runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: huggingface/transformers-pytorch-amd-gpu-test options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: ROCM-SMI run: | rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" + echo "ROCR: $ROCR_VISIBLE_DEVICES" + - name: ROCMINFO + run: | + rocminfo | grep "Agent" -A 14 setup: name: Setup @@ -64,7 +65,7 @@ jobs: machine_type: [single-gpu, multi-gpu] runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: huggingface/transformers-pytorch-amd-gpu-test options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} @@ -94,12 +95,12 @@ jobs: - name: ROCM-SMI run: | rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" + - name: ROCMINFO + run: | + rocminfo | grep "Agent" -A 14 run_tests_single_gpu: name: Single GPU tests @@ -111,7 +112,7 @@ jobs: machine_type: [single-gpu] runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: huggingface/transformers-pytorch-amd-gpu-test options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ needs: setup steps: @@ -126,6 +127,10 @@ jobs: echo "$matrix_folders" echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + - name: ROCMINFO + run: | + rocminfo | grep "Agent" -A 14 + - name: Update clone working-directory: /transformers run: git fetch && git checkout ${{ github.sha }} @@ -137,9 +142,6 @@ jobs: - name: ROCM-SMI run: | rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -179,7 +181,7 @@ jobs: machine_type: [multi-gpu] runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: huggingface/transformers-pytorch-amd-gpu-test options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ needs: setup steps: @@ -205,9 +207,11 @@ jobs: - name: ROCM-SMI run: | rocm-smi - - name: ROCM-INFO + + - name: ROCMINFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -245,7 +249,7 @@ jobs: machine_type: [single-gpu] runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: huggingface/transformers-pytorch-amd-gpu-test options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ needs: setup steps: @@ -260,9 +264,6 @@ jobs: - name: ROCM-SMI run: | rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -302,7 +303,7 @@ jobs: machine_type: [single-gpu, multi-gpu] runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: huggingface/transformers-pytorch-amd-gpu-test options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ needs: setup steps: @@ -317,9 +318,6 @@ jobs: - name: ROCM-SMI run: | rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES"