Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ydshieh committed Nov 17, 2023
1 parent 444de31 commit ebbd69f
Showing 1 changed file with 30 additions and 18 deletions.
48 changes: 30 additions & 18 deletions .github/workflows/self-scheduled-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
setup:
Expand All @@ -63,7 +65,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
Expand Down Expand Up @@ -92,9 +94,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
run_tests_single_gpu:
Expand All @@ -108,7 +112,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Echo folder ${{ matrix.folders }}
Expand All @@ -133,9 +137,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down Expand Up @@ -174,7 +180,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Echo folder ${{ matrix.folders }}
Expand All @@ -199,9 +205,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down Expand Up @@ -238,7 +246,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
Expand All @@ -252,9 +260,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down Expand Up @@ -293,7 +303,7 @@ jobs:
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
Expand All @@ -307,9 +317,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
- name: Show HIP environment
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "HIP: $HIP_VISIBLE_DEVICES"
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
Expand Down

0 comments on commit ebbd69f

Please sign in to comment.