Self-hosted runner (push) #10880
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Self-hosted runner (push) | |
on: | |
workflow_run: | |
workflows: ["Self-hosted runner (push-caller)"] | |
branches: ["main"] | |
types: [completed] | |
push: | |
branches: | |
- ci_* | |
- ci-* | |
paths: | |
- "src/**" | |
- "tests/**" | |
- ".github/**" | |
- "templates/**" | |
- "utils/**" | |
repository_dispatch: | |
env: | |
HF_HOME: /mnt/cache | |
TRANSFORMERS_IS_CI: yes | |
OMP_NUM_THREADS: 8 | |
MKL_NUM_THREADS: 8 | |
PYTEST_TIMEOUT: 60 | |
TF_FORCE_GPU_ALLOW_GROWTH: true | |
RUN_PT_TF_CROSS_TESTS: 1 | |
CUDA_VISIBLE_DEVICES: 0,1 | |
jobs: | |
setup: | |
name: Setup | |
strategy: | |
matrix: | |
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] | |
runs-on: | |
group: '${{ matrix.machine_type }}' | |
container: | |
image: huggingface/transformers-all-latest-gpu-push-ci | |
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | |
outputs: | |
matrix: ${{ steps.set-matrix.outputs.matrix }} | |
test_map: ${{ steps.set-matrix.outputs.test_map }} | |
env: | |
# `CI_BRANCH_PUSH`: The branch name from the push event | |
# `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event | |
# `CI_SHA_PUSH`: The commit SHA from the push event | |
# `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event | |
CI_BRANCH_PUSH: ${{ github.event.ref }} | |
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | |
CI_SHA_PUSH: ${{ github.event.head_commit.id }} | |
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | |
steps: | |
# Necessary to get the correct branch name and commit SHA for `workflow_run` event | |
# We also take into account the `push` event (we might want to test some changes in a branch) | |
- name: Prepare custom environment variables | |
shell: bash | |
# `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) | |
# `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) | |
run: | | |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | |
echo $CI_BRANCH_PUSH | |
echo $CI_BRANCH_WORKFLOW_RUN | |
echo $CI_SHA_PUSH | |
echo $CI_SHA_WORKFLOW_RUN | |
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | |
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | |
- name: print environment variables | |
run: | | |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | |
echo "env.CI_SHA = ${{ env.CI_SHA }}" | |
- name: Update clone using environment variables | |
working-directory: /transformers | |
run: | | |
echo "original branch = $(git branch --show-current)" | |
git fetch && git checkout ${{ env.CI_BRANCH }} | |
echo "updated branch = $(git branch --show-current)" | |
git checkout ${{ env.CI_SHA }} | |
echo "log = $(git log -n 1)" | |
- name: Cleanup | |
working-directory: /transformers | |
run: | | |
rm -rf tests/__pycache__ | |
rm -rf tests/models/__pycache__ | |
rm -rf reports | |
- name: Show installed libraries and their versions | |
working-directory: /transformers | |
run: pip freeze | |
- name: Fetch the tests to run | |
working-directory: /transformers | |
# TODO: add `git-python` in the docker images | |
run: | | |
pip install --upgrade git-python | |
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt | |
- name: Report fetched tests | |
uses: actions/upload-artifact@v4 | |
with: | |
name: test_fetched | |
path: /transformers/test_preparation.txt | |
- id: set-matrix | |
name: Organize tests into models | |
working-directory: /transformers | |
# The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc. | |
# The `test_map` is used to get the actual identified test files under each key. | |
# If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail) | |
run: | | |
if [ -f test_map.json ]; then | |
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') | |
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') | |
else | |
keys=$(python3 -c 'keys = ["dummy"]; print(keys)') | |
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') | |
fi | |
echo $keys | |
echo $test_map | |
echo "matrix=$keys" >> $GITHUB_OUTPUT | |
echo "test_map=$test_map" >> $GITHUB_OUTPUT | |
run_tests_single_gpu: | |
name: Model tests | |
needs: setup | |
# `dummy` means there is no test to run | |
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true | |
strategy: | |
fail-fast: false | |
matrix: | |
folders: ${{ fromJson(needs.setup.outputs.matrix) }} | |
machine_type: [aws-g4dn-2xlarge-cache] | |
runs-on: | |
group: '${{ matrix.machine_type }}' | |
container: | |
image: huggingface/transformers-all-latest-gpu-push-ci | |
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | |
env: | |
# For the meaning of these environment variables, see the job `Setup` | |
CI_BRANCH_PUSH: ${{ github.event.ref }} | |
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | |
CI_SHA_PUSH: ${{ github.event.head_commit.id }} | |
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | |
steps: | |
# Necessary to get the correct branch name and commit SHA for `workflow_run` event | |
# We also take into account the `push` event (we might want to test some changes in a branch) | |
- name: Prepare custom environment variables | |
shell: bash | |
# For the meaning of these environment variables, see the job `Setup` | |
run: | | |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | |
echo $CI_BRANCH_PUSH | |
echo $CI_BRANCH_WORKFLOW_RUN | |
echo $CI_SHA_PUSH | |
echo $CI_SHA_WORKFLOW_RUN | |
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | |
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | |
- name: print environment variables | |
run: | | |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | |
echo "env.CI_SHA = ${{ env.CI_SHA }}" | |
- name: Set `machine_type` for report and artifact names | |
working-directory: /transformers | |
shell: bash | |
run: | | |
echo "${{ matrix.machine_type }}" | |
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | |
machine_type=single-gpu | |
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | |
machine_type=multi-gpu | |
else | |
machine_type=${{ matrix.machine_type }} | |
fi | |
echo "$machine_type" | |
echo "machine_type=$machine_type" >> $GITHUB_ENV | |
- name: Update clone using environment variables | |
working-directory: /transformers | |
run: | | |
echo "original branch = $(git branch --show-current)" | |
git fetch && git checkout ${{ env.CI_BRANCH }} | |
echo "updated branch = $(git branch --show-current)" | |
git checkout ${{ env.CI_SHA }} | |
echo "log = $(git log -n 1)" | |
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) | |
working-directory: /transformers | |
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | |
- name: Echo folder ${{ matrix.folders }} | |
shell: bash | |
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to | |
# set the artifact folder names (because the character `/` is not allowed). | |
run: | | |
echo "${{ matrix.folders }}" | |
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}" | |
matrix_folders=${{ matrix.folders }} | |
matrix_folders=${matrix_folders/'models/'/'models_'} | |
echo "$matrix_folders" | |
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV | |
- name: NVIDIA-SMI | |
run: | | |
nvidia-smi | |
- name: Environment | |
working-directory: /transformers | |
run: | | |
python3 utils/print_env.py | |
- name: Show installed libraries and their versions | |
working-directory: /transformers | |
run: pip freeze | |
- name: Run all non-slow selected tests on GPU | |
working-directory: /transformers | |
run: | | |
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }} | |
- name: Failure short reports | |
if: ${{ failure() }} | |
continue-on-error: true | |
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt | |
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" | |
if: ${{ always() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports | |
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} | |
run_tests_multi_gpu: | |
name: Model tests | |
needs: setup | |
# `dummy` means there is no test to run | |
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true | |
strategy: | |
fail-fast: false | |
matrix: | |
folders: ${{ fromJson(needs.setup.outputs.matrix) }} | |
machine_type: [aws-g4dn-12xlarge-cache] | |
runs-on: | |
group: '${{ matrix.machine_type }}' | |
container: | |
image: huggingface/transformers-all-latest-gpu-push-ci | |
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | |
env: | |
# For the meaning of these environment variables, see the job `Setup` | |
CI_BRANCH_PUSH: ${{ github.event.ref }} | |
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | |
CI_SHA_PUSH: ${{ github.event.head_commit.id }} | |
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | |
steps: | |
# Necessary to get the correct branch name and commit SHA for `workflow_run` event | |
# We also take into account the `push` event (we might want to test some changes in a branch) | |
- name: Prepare custom environment variables | |
shell: bash | |
# For the meaning of these environment variables, see the job `Setup` | |
run: | | |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | |
echo $CI_BRANCH_PUSH | |
echo $CI_BRANCH_WORKFLOW_RUN | |
echo $CI_SHA_PUSH | |
echo $CI_SHA_WORKFLOW_RUN | |
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | |
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | |
- name: print environment variables | |
run: | | |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | |
echo "env.CI_SHA = ${{ env.CI_SHA }}" | |
- name: Set `machine_type` for report and artifact names | |
working-directory: /transformers | |
shell: bash | |
run: | | |
echo "${{ matrix.machine_type }}" | |
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | |
machine_type=single-gpu | |
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | |
machine_type=multi-gpu | |
else | |
machine_type=${{ matrix.machine_type }} | |
fi | |
echo "$machine_type" | |
echo "machine_type=$machine_type" >> $GITHUB_ENV | |
- name: Update clone using environment variables | |
working-directory: /transformers | |
run: | | |
echo "original branch = $(git branch --show-current)" | |
git fetch && git checkout ${{ env.CI_BRANCH }} | |
echo "updated branch = $(git branch --show-current)" | |
git checkout ${{ env.CI_SHA }} | |
echo "log = $(git log -n 1)" | |
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) | |
working-directory: /transformers | |
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | |
- name: Echo folder ${{ matrix.folders }} | |
shell: bash | |
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to | |
# set the artifact folder names (because the character `/` is not allowed). | |
run: | | |
echo "${{ matrix.folders }}" | |
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}" | |
matrix_folders=${{ matrix.folders }} | |
matrix_folders=${matrix_folders/'models/'/'models_'} | |
echo "$matrix_folders" | |
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV | |
- name: NVIDIA-SMI | |
run: | | |
nvidia-smi | |
- name: Environment | |
working-directory: /transformers | |
run: | | |
python3 utils/print_env.py | |
- name: Show installed libraries and their versions | |
working-directory: /transformers | |
run: pip freeze | |
- name: Run all non-slow selected tests on GPU | |
env: | |
MKL_SERVICE_FORCE_INTEL: 1 | |
working-directory: /transformers | |
run: | | |
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }} | |
- name: Failure short reports | |
if: ${{ failure() }} | |
continue-on-error: true | |
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt | |
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" | |
if: ${{ always() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports | |
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} | |
run_tests_torch_cuda_extensions_single_gpu: | |
name: Torch CUDA extension tests | |
needs: setup | |
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended') | |
strategy: | |
fail-fast: false | |
matrix: | |
machine_type: [aws-g4dn-2xlarge-cache] | |
runs-on: | |
group: '${{ matrix.machine_type }}' | |
container: | |
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci | |
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | |
env: | |
# For the meaning of these environment variables, see the job `Setup` | |
CI_BRANCH_PUSH: ${{ github.event.ref }} | |
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | |
CI_SHA_PUSH: ${{ github.event.head_commit.id }} | |
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | |
steps: | |
# Necessary to get the correct branch name and commit SHA for `workflow_run` event | |
# We also take into account the `push` event (we might want to test some changes in a branch) | |
- name: Prepare custom environment variables | |
shell: bash | |
# For the meaning of these environment variables, see the job `Setup` | |
run: | | |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | |
echo $CI_BRANCH_PUSH | |
echo $CI_BRANCH_WORKFLOW_RUN | |
echo $CI_SHA_PUSH | |
echo $CI_SHA_WORKFLOW_RUN | |
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | |
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | |
- name: print environment variables | |
run: | | |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | |
echo "env.CI_SHA = ${{ env.CI_SHA }}" | |
- name: Set `machine_type` for report and artifact names | |
working-directory: /workspace/transformers | |
shell: bash | |
run: | | |
echo "${{ matrix.machine_type }}" | |
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | |
machine_type=single-gpu | |
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | |
machine_type=multi-gpu | |
else | |
machine_type=${{ matrix.machine_type }} | |
fi | |
echo "$machine_type" | |
echo "machine_type=$machine_type" >> $GITHUB_ENV | |
- name: Update clone using environment variables | |
working-directory: /workspace/transformers | |
run: | | |
echo "original branch = $(git branch --show-current)" | |
git fetch && git checkout ${{ env.CI_BRANCH }} | |
echo "updated branch = $(git branch --show-current)" | |
git checkout ${{ env.CI_SHA }} | |
echo "log = $(git log -n 1)" | |
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) | |
working-directory: /workspace/transformers | |
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | |
- name: Remove cached torch extensions | |
run: rm -rf /github/home/.cache/torch_extensions/ | |
# To avoid unknown test failures | |
- name: Pre build DeepSpeed *again* | |
working-directory: /workspace | |
run: | | |
python3 -m pip uninstall -y deepspeed | |
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check | |
- name: NVIDIA-SMI | |
run: | | |
nvidia-smi | |
- name: Environment | |
working-directory: /workspace/transformers | |
run: | | |
python utils/print_env.py | |
- name: Show installed libraries and their versions | |
working-directory: /workspace/transformers | |
run: pip freeze | |
- name: Run all non-slow selected tests on GPU | |
working-directory: /workspace/transformers | |
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. | |
run: | | |
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended | |
- name: Failure short reports | |
if: ${{ failure() }} | |
continue-on-error: true | |
run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt | |
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" | |
if: ${{ always() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | |
path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | |
run_tests_torch_cuda_extensions_multi_gpu: | |
name: Torch CUDA extension tests | |
needs: setup | |
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended') | |
strategy: | |
fail-fast: false | |
matrix: | |
machine_type: [aws-g4dn-12xlarge-cache] | |
runs-on: | |
group: '${{ matrix.machine_type }}' | |
container: | |
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci | |
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | |
env: | |
# For the meaning of these environment variables, see the job `Setup` | |
CI_BRANCH_PUSH: ${{ github.event.ref }} | |
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | |
CI_SHA_PUSH: ${{ github.event.head_commit.id }} | |
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | |
steps: | |
# Necessary to get the correct branch name and commit SHA for `workflow_run` event | |
# We also take into account the `push` event (we might want to test some changes in a branch) | |
- name: Prepare custom environment variables | |
shell: bash | |
# For the meaning of these environment variables, see the job `Setup` | |
run: | | |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | |
echo $CI_BRANCH_PUSH | |
echo $CI_BRANCH_WORKFLOW_RUN | |
echo $CI_SHA_PUSH | |
echo $CI_SHA_WORKFLOW_RUN | |
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | |
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | |
- name: print environment variables | |
run: | | |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | |
echo "env.CI_SHA = ${{ env.CI_SHA }}" | |
- name: Set `machine_type` for report and artifact names | |
working-directory: /workspace/transformers | |
shell: bash | |
run: | | |
echo "${{ matrix.machine_type }}" | |
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | |
machine_type=single-gpu | |
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | |
machine_type=multi-gpu | |
else | |
machine_type=${{ matrix.machine_type }} | |
fi | |
echo "$machine_type" | |
echo "machine_type=$machine_type" >> $GITHUB_ENV | |
- name: Update clone using environment variables | |
working-directory: /workspace/transformers | |
run: | | |
echo "original branch = $(git branch --show-current)" | |
git fetch && git checkout ${{ env.CI_BRANCH }} | |
echo "updated branch = $(git branch --show-current)" | |
git checkout ${{ env.CI_SHA }} | |
echo "log = $(git log -n 1)" | |
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) | |
working-directory: /workspace/transformers | |
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | |
- name: Remove cached torch extensions | |
run: rm -rf /github/home/.cache/torch_extensions/ | |
# To avoid unknown test failures | |
- name: Pre build DeepSpeed *again* | |
working-directory: /workspace | |
run: | | |
python3 -m pip uninstall -y deepspeed | |
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check | |
- name: NVIDIA-SMI | |
run: | | |
nvidia-smi | |
- name: Environment | |
working-directory: /workspace/transformers | |
run: | | |
python utils/print_env.py | |
- name: Show installed libraries and their versions | |
working-directory: /workspace/transformers | |
run: pip freeze | |
- name: Run all non-slow selected tests on GPU | |
working-directory: /workspace/transformers | |
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. | |
run: | | |
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended | |
- name: Failure short reports | |
if: ${{ failure() }} | |
continue-on-error: true | |
run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt | |
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" | |
if: ${{ always() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | |
path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports | |
send_results: | |
name: Send results to webhook | |
runs-on: ubuntu-22.04 | |
if: always() | |
needs: [ | |
setup, | |
run_tests_single_gpu, | |
run_tests_multi_gpu, | |
run_tests_torch_cuda_extensions_single_gpu, | |
run_tests_torch_cuda_extensions_multi_gpu | |
] | |
env: | |
# For the meaning of these environment variables, see the job `Setup` | |
CI_BRANCH_PUSH: ${{ github.event.ref }} | |
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} | |
CI_SHA_PUSH: ${{ github.event.head_commit.id }} | |
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} | |
steps: | |
- name: Preliminary job status | |
shell: bash | |
# For the meaning of these environment variables, see the job `Setup` | |
run: | | |
echo "Setup status: ${{ needs.setup.result }}" | |
# Necessary to get the correct branch name and commit SHA for `workflow_run` event | |
# We also take into account the `push` event (we might want to test some changes in a branch) | |
- name: Prepare custom environment variables | |
shell: bash | |
# For the meaning of these environment variables, see the job `Setup` | |
run: | | |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} | |
echo $CI_BRANCH_PUSH | |
echo $CI_BRANCH_WORKFLOW_RUN | |
echo $CI_SHA_PUSH | |
echo $CI_SHA_WORKFLOW_RUN | |
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV | |
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV | |
- name: print environment variables | |
run: | | |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" | |
echo "env.CI_SHA = ${{ env.CI_SHA }}" | |
- uses: actions/checkout@v4 | |
# To avoid failure when multiple commits are merged into `main` in a short period of time. | |
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ... | |
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit) | |
with: | |
fetch-depth: 20 | |
- name: Update clone using environment variables | |
run: | | |
echo "original branch = $(git branch --show-current)" | |
git fetch && git checkout ${{ env.CI_BRANCH }} | |
echo "updated branch = $(git branch --show-current)" | |
git checkout ${{ env.CI_SHA }} | |
echo "log = $(git log -n 1)" | |
- uses: actions/download-artifact@v4 | |
- name: Send message to Slack | |
env: | |
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} | |
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} | |
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} | |
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} | |
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} | |
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} | |
CI_EVENT: push | |
CI_TITLE_PUSH: ${{ github.event.head_commit.message }} | |
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} | |
CI_SHA: ${{ env.CI_SHA }} | |
SETUP_STATUS: ${{ needs.setup.result }} | |
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change | |
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. | |
run: | | |
pip install huggingface_hub | |
pip install slack_sdk | |
pip show slack_sdk | |
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" |