Skip to content

Commit

Permalink
definitively fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
glegendre01 authored Nov 17, 2023
1 parent 83f24ca commit 469c482
Showing 1 changed file with 19 additions and 23 deletions.
42 changes: 19 additions & 23 deletions .github/workflows/self-scheduled-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: ROCMINFO
run: |
Expand All @@ -66,7 +66,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
Expand Down Expand Up @@ -95,13 +95,13 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: ROCMINFO
run: |
rocminfo | grep "Agent" -A 14
run_tests_single_gpu:
name: Single GPU tests
strategy:
Expand All @@ -113,7 +113,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Echo folder ${{ matrix.folders }}
Expand All @@ -127,11 +127,6 @@ jobs:
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: ROCMINFO
run: |
rocminfo | grep "Agent" -A 14
Expand All @@ -147,9 +142,8 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down Expand Up @@ -188,7 +182,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Echo folder ${{ matrix.folders }}
Expand All @@ -213,9 +207,13 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: ROCMINFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down Expand Up @@ -252,7 +250,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
Expand All @@ -266,9 +264,8 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down Expand Up @@ -307,7 +304,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-test
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
Expand All @@ -321,9 +318,8 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down

0 comments on commit 469c482

Please sign in to comment.