Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ydshieh committed Nov 20, 2023
1 parent 8cd7cba commit 67a3792
Showing 1 changed file with 58 additions and 58 deletions.
116 changes: 58 additions & 58 deletions .github/workflows/self-scheduled-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
name: Identify models to test
working-directory: /transformers/tests
run: |
echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2[:5] + d1[:5]; print(d)')" >> $GITHUB_OUTPUT
echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2[:5] + d1[:0]; print(d)')" >> $GITHUB_OUTPUT
- name: ROCM-SMI
run: |
Expand Down Expand Up @@ -242,62 +242,62 @@ jobs:
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}

run_examples_gpu:
name: Examples tests
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}

- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze

- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt

- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_examples_gpu
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
# run_examples_gpu:
# name: Examples tests
# strategy:
# fail-fast: false
# matrix:
# machine_type: [single-gpu]
# runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
# container:
# image: huggingface/transformers-pytorch-amd-gpu
# options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
# needs: [setup, run_tests_single_gpu]
# steps:
# - name: Update clone
# working-directory: /transformers
# run: git fetch && git checkout ${{ github.sha }}
#
# - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
# working-directory: /transformers
# run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
#
# - name: ROCM-SMI
# run: |
# rocm-smi
# - name: ROCM-INFO
# run: |
# rocminfo | grep "Agent" -A 14
# - name: Show ROCR environment
# run: |
# echo "ROCR: $ROCR_VISIBLE_DEVICES"
#
# - name: Environment
# working-directory: /transformers
# run: |
# python3 utils/print_env.py
#
# - name: Show installed libraries and their versions
# working-directory: /transformers
# run: pip freeze
#
# - name: Run examples tests on GPU
# working-directory: /transformers
# run: |
# pip install -r examples/pytorch/_tests_requirements.txt
# python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
#
# - name: Failure short reports
# if: ${{ failure() }}
# continue-on-error: true
# run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
#
# - name: Test suite reports artifacts
# if: ${{ always() }}
# uses: actions/upload-artifact@v3
# with:
# name: ${{ matrix.machine_type }}_run_examples_gpu
# path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu

run_pipelines_torch_gpu:
name: PyTorch pipelines tests
Expand All @@ -309,7 +309,7 @@ jobs:
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
needs: [setup, run_tests_single_gpu, run_tests_multi_gpu]
steps:
- name: Update clone
working-directory: /transformers
Expand Down

0 comments on commit 67a3792

Please sign in to comment.