Skip to content

Commit

Permalink
try gau's commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ydshieh committed Nov 18, 2023
1 parent 0ef8c19 commit 050cbbd
Showing 1 changed file with 20 additions and 22 deletions.
42 changes: 20 additions & 22 deletions .github/workflows/self-scheduled-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,19 @@ jobs:
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: ROCMINFO
run: |
rocminfo | grep "Agent" -A 14
setup:
name: Setup
Expand All @@ -64,7 +65,7 @@ jobs:
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
Expand Down Expand Up @@ -94,12 +95,12 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: ROCMINFO
run: |
rocminfo | grep "Agent" -A 14
run_tests_single_gpu:
name: Single GPU tests
Expand All @@ -111,7 +112,7 @@ jobs:
machine_type: [single-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
Expand All @@ -126,6 +127,10 @@ jobs:
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: ROCMINFO
run: |
rocminfo | grep "Agent" -A 14
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
Expand All @@ -137,9 +142,6 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
Expand Down Expand Up @@ -179,7 +181,7 @@ jobs:
machine_type: [multi-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
Expand All @@ -205,9 +207,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
- name: ROCMINFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
Expand Down Expand Up @@ -245,7 +249,7 @@ jobs:
machine_type: [single-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
Expand All @@ -260,9 +264,6 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
Expand Down Expand Up @@ -302,7 +303,7 @@ jobs:
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
Expand All @@ -317,9 +318,6 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
Expand Down

0 comments on commit 050cbbd

Please sign in to comment.