From ebaca1f01ef5f5614800e38a28b5dd6bf16bd737 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 22 Apr 2024 15:49:04 +0200 Subject: [PATCH] better names --- .github/workflows/model_jobs.yml | 18 ++--- .github/workflows/self-nightly-scheduled.yml | 16 ++-- .github/workflows/self-past.yml | 16 ++-- .github/workflows/self-push-amd.yml | 14 ++-- .github/workflows/self-push.yml | 20 ++--- .github/workflows/self-scheduled-amd.yml | 68 ++++++++-------- .github/workflows/self-scheduled-caller.yml | 6 +- .github/workflows/self-scheduled.yml | 82 ++++++++++---------- .github/workflows/slack-report.yml | 4 +- utils/notification_service.py | 14 ++-- utils/notification_service_quantization.py | 4 +- 11 files changed, 131 insertions(+), 131 deletions(-) diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml index 978e5f617e3ae5..9aca7216aa9e95 100644 --- a/.github/workflows/model_jobs.yml +++ b/.github/workflows/model_jobs.yml @@ -28,7 +28,7 @@ env: CUDA_VISIBLE_DEVICES: 0,1 jobs: - model_job: + run_models_gpu: name: " " strategy: fail-fast: false @@ -80,23 +80,23 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + run: python3 -m pytest -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - name: Run test shell: bash run: | - mkdir -p /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} - echo "hello" > /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}/hello.txt - echo "${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}" + mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" - - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml index 7906325e83bb9d..875e715b068b6c 100644 --- a/.github/workflows/self-nightly-scheduled.yml +++ b/.github/workflows/self-nightly-scheduled.yml @@ -2,7 +2,7 @@ name: Self-hosted runner (nightly-ci) # Note that each job's dependencies go into a corresponding docker file. # -# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is +# For example for `run_torch_cuda_extensions_gpu` the docker image is # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` @@ -183,7 +183,7 @@ jobs: name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - run_all_tests_torch_cuda_extensions_gpu: + run_torch_cuda_extensions_gpu: name: Torch CUDA extension tests strategy: fail-fast: false @@ -231,19 +231,19 @@ jobs: - name: Run all tests on GPU working-directory: /workspace/transformers run: | - python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports send_results: name: Send results to webhook @@ -253,7 +253,7 @@ jobs: setup, run_tests_single_gpu, run_tests_multi_gpu, - run_all_tests_torch_cuda_extensions_gpu + run_torch_cuda_extensions_gpu ] steps: - name: Preliminary job status diff --git a/.github/workflows/self-past.yml b/.github/workflows/self-past.yml index 7be658c43202ff..ca47c454f6894a 100644 --- a/.github/workflows/self-past.yml +++ b/.github/workflows/self-past.yml @@ -2,7 +2,7 @@ name: Self-hosted runner (past-ci) # Note that each job's dependencies go into a corresponding docker file. # -# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is +# For example for `run_torch_cuda_extensions_gpu` the docker image is # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` @@ -228,7 +228,7 @@ jobs: name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - run_all_tests_torch_cuda_extensions_gpu: + run_torch_cuda_extensions_gpu: name: Torch CUDA extension tests if: inputs.framework == 'pytorch' strategy: @@ -286,19 +286,19 @@ jobs: - name: Run all tests on GPU working-directory: /transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} + path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports send_results: name: Send results to webhook @@ -308,7 +308,7 @@ jobs: setup, run_tests_single_gpu, run_tests_multi_gpu, - run_all_tests_torch_cuda_extensions_gpu + run_torch_cuda_extensions_gpu ] steps: - name: Preliminary job status diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index b285a5f8fc0ad8..8705f398b2b510 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -145,7 +145,7 @@ jobs: echo "matrix=$keys" >> $GITHUB_OUTPUT echo "test_map=$test_map" >> $GITHUB_OUTPUT - run_tests_amdgpu: + run_models_gpu: name: Model tests needs: setup_gpu # `dummy` means there is no test to run @@ -230,19 +230,19 @@ jobs: - name: Run all non-slow selected tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} + python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports send_results: name: Send results to webhook @@ -252,7 +252,7 @@ jobs: check_runner_status, check_runners, setup_gpu, - run_tests_amdgpu, + run_models_gpu, # run_tests_torch_cuda_extensions_single_gpu, # run_tests_torch_cuda_extensions_multi_gpu ] diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 17dff31fa4e330..1bc02ccd826eb0 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -385,19 +385,19 @@ jobs: working-directory: /workspace/transformers # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. run: | - python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports run_tests_torch_cuda_extensions_multi_gpu: name: Torch CUDA extension tests @@ -475,19 +475,19 @@ jobs: working-directory: /workspace/transformers # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. run: | - python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports send_results: name: Send results to webhook diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml index 09926071802a7a..d2ab90d1331848 100644 --- a/.github/workflows/self-scheduled-amd.yml +++ b/.github/workflows/self-scheduled-amd.yml @@ -108,7 +108,7 @@ jobs: run: | python3 utils/print_env.py - run_tests_single_gpu: + run_models_gpu_single_gpu: name: Single GPU tests strategy: max-parallel: 1 # For now, not to parallelize. Can change later if it works well. @@ -162,21 +162,21 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports - run_tests_multi_gpu: + run_models_gpu_multi_gpu: name: Multi GPU tests strategy: max-parallel: 1 @@ -230,19 +230,19 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports run_examples_gpu: name: Examples tests @@ -287,19 +287,19 @@ jobs: working-directory: /transformers run: | pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_examples_gpu - path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu + name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports run_pipelines_torch_gpu: name: PyTorch pipelines tests @@ -343,21 +343,21 @@ jobs: - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu + name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - run_tests_torch_deepspeed_gpu: + run_torch_cuda_extensions_gpu: name: Torch ROCm deepspeed tests strategy: fail-fast: false @@ -400,19 +400,19 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_deepspeed_gpu tests/deepspeed tests/extended + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports run_extract_warnings: name: Extract warnings in CI artifacts @@ -422,11 +422,11 @@ jobs: check_runner_status, check_runners, setup, - run_tests_single_gpu, - run_tests_multi_gpu, + run_models_gpu_single_gpu, + run_models_gpu_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu, - run_tests_torch_deepspeed_gpu + run_torch_cuda_extensions_gpu ] steps: - name: Checkout transformers @@ -471,11 +471,11 @@ jobs: check_runner_status, check_runners, setup, - run_tests_single_gpu, - run_tests_multi_gpu, + run_models_gpu_single_gpu, + run_models_gpu_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu, - run_tests_torch_deepspeed_gpu, + run_torch_cuda_extensions_gpu, run_extract_warnings ] steps: diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index 59b992bcd250e2..40689c629a09bf 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -14,7 +14,7 @@ jobs: name: Model CI uses: ./.github/workflows/self-scheduled.yml with: - job: run_tests_gpu + job: run_models_gpu slack_report_channel: "#transformers-ci-daily-models" secrets: inherit @@ -46,7 +46,7 @@ jobs: name: DeepSpeed CI uses: ./.github/workflows/self-scheduled.yml with: - job: run_all_tests_torch_cuda_extensions_gpu + job: run_torch_cuda_extensions_gpu slack_report_channel: "#transformers-ci-daily-deepspeed" secrets: inherit @@ -54,6 +54,6 @@ jobs: name: Quantization CI uses: ./.github/workflows/self-scheduled.yml with: - job: run_tests_quantization_torch_gpu + job: run_quantization_torch_gpu slack_report_channel: "#transformers-ci-daily-quantization" secrets: inherit diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index fa41bffc0bc826..0f765d3272001b 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -2,7 +2,7 @@ name: Self-hosted runner (scheduled) # Note that each job's dependencies go into a corresponding docker file. # -# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is +# For example for `run_torch_cuda_extensions_gpu` the docker image is # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` @@ -33,7 +33,7 @@ env: jobs: setup: - if: contains(fromJSON('["run_tests_gpu", "run_tests_quantization_torch_gpu"]'), inputs.job) + if: contains(fromJSON('["run_models_gpu", "run_quantization_torch_gpu"]'), inputs.job) name: Setup strategy: matrix: @@ -64,7 +64,7 @@ jobs: run: pip freeze - id: set-matrix - if: ${{ inputs.job == 'run_tests_gpu' }} + if: ${{ inputs.job == 'run_models_gpu' }} name: Identify models to test working-directory: /transformers/tests run: | @@ -72,7 +72,7 @@ jobs: echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - id: set-matrix-quantization - if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + if: ${{ inputs.job == 'run_quantization_torch_gpu' }} name: Identify quantization method to test working-directory: /transformers/tests run: | @@ -82,8 +82,8 @@ jobs: run: | nvidia-smi - run_tests_gpu: - if: ${{ inputs.job == 'run_tests_gpu' }} + run_models_gpu: + if: ${{ inputs.job == 'run_models_gpu' }} name: " " needs: setup strategy: @@ -134,19 +134,19 @@ jobs: - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu + name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports run_pipelines_tf_gpu: if: ${{ inputs.job == 'run_pipelines_tf_gpu' }} @@ -185,19 +185,19 @@ jobs: - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ always() }} run: | - cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt + cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu + name: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports run_examples_gpu: if: ${{ inputs.job == 'run_examples_gpu' }} @@ -236,22 +236,22 @@ jobs: working-directory: /transformers run: | pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_examples_gpu - path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu + name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports - run_all_tests_torch_cuda_extensions_gpu: - if: ${{ inputs.job == 'run_all_tests_torch_cuda_extensions_gpu' }} + run_torch_cuda_extensions_gpu: + if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} name: Torch CUDA extension tests strategy: fail-fast: false @@ -296,22 +296,22 @@ jobs: - name: Run all tests on GPU working-directory: /workspace/transformers run: | - python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - run_tests_quantization_torch_gpu: - if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + run_quantization_torch_gpu: + if: ${{ inputs.job == 'run_quantization_torch_gpu' }} name: " " needs: setup strategy: @@ -357,26 +357,26 @@ jobs: - name: Run quantization tests on GPU working-directory: /transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix.folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }} - path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix.folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports run_extract_warnings: - # Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic. - if: ${{ always() && inputs.job == 'run_tests_gpu' }} + # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic. + if: ${{ always() && inputs.job == 'run_models_gpu' }} name: Extract warnings in CI artifacts runs-on: ubuntu-22.04 - needs: [setup, run_tests_gpu] + needs: [setup, run_models_gpu] steps: - name: Checkout transformers uses: actions/checkout@v4 @@ -416,12 +416,12 @@ jobs: name: Slack Report needs: [ setup, - run_tests_gpu, + run_models_gpu, run_pipelines_torch_gpu, run_pipelines_tf_gpu, run_examples_gpu, - run_all_tests_torch_cuda_extensions_gpu, - run_tests_quantization_torch_gpu, + run_torch_cuda_extensions_gpu, + run_quantization_torch_gpu, run_extract_warnings ] if: ${{ always() }} diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml index 88660914bfdc65..72442385e03dbb 100644 --- a/.github/workflows/slack-report.yml +++ b/.github/workflows/slack-report.yml @@ -35,7 +35,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 - name: Send message to Slack - if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }} + if: ${{ inputs.job != 'run_quantization_torch_gpu' }} env: CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} @@ -70,7 +70,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 - name: Send message to Slack for quantization workflow - if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + if: ${{ inputs.job == 'run_quantization_torch_gpu' }} env: CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} diff --git a/utils/notification_service.py b/utils/notification_service.py index 158e01942b81fa..641c80d22d7f41 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -992,13 +992,13 @@ def prepare_reports(title, header, reports, to_truncate=True): "job_link": {}, } for model in models - if f"run_all_tests_gpu_{model}_test_reports" in available_artifacts + if f"run_models_gpu_{model}_test_reports" in available_artifacts } unclassified_model_failures = [] for model in model_results.keys(): - for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths: + for artifact_path in available_artifacts[f"run_models_gpu_{model}_test_reports"].paths: artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) if "stats" in artifact: # Link to the GitHub Action job @@ -1052,10 +1052,10 @@ def prepare_reports(title, header, reports, to_truncate=True): # Additional runs additional_files = { - "PyTorch pipelines": "run_tests_torch_pipeline_gpu", - "TensorFlow pipelines": "run_tests_tf_pipeline_gpu", - "Examples directory": "run_examples_gpu", - "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports", + "PyTorch pipelines": "run_pipelines_torch_gpu_test_reports", + "TensorFlow pipelines": "run_pipelines_tf_gpu_test_reports", + "Examples directory": "run_examples_gpu_test_reports", + "Torch CUDA extension tests": "run_torch_cuda_extensions_gpu_test_reports", } if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"): @@ -1075,7 +1075,7 @@ def prepare_reports(title, header, reports, to_truncate=True): "run_pipelines_torch_gpu": "PyTorch pipelines", "run_pipelines_tf_gpu": "TensorFlow pipelines", "run_examples_gpu": "Examples directory", - "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests", + "run_torch_cuda_extensions_gpu": "Torch CUDA extension tests", } # Remove some entries in `additional_files` if they are not concerned. diff --git a/utils/notification_service_quantization.py b/utils/notification_service_quantization.py index 11bc57e618a7e4..1687eeaa25f32f 100644 --- a/utils/notification_service_quantization.py +++ b/utils/notification_service_quantization.py @@ -200,7 +200,7 @@ def post_reply(self): "job_link": {}, } for quant in quantization_matrix - if f"run_tests_quantization_torch_gpu_{quant}" in available_artifacts + if f"run_quantization_torch_gpu_{ quant }_test_reports" in available_artifacts } github_actions_jobs = get_jobs( @@ -217,7 +217,7 @@ def post_reply(self): break for quant in quantization_results.keys(): - for artifact_path in available_artifacts[f"run_tests_quantization_torch_gpu_{quant}"].paths: + for artifact_path in available_artifacts[f"run_quantization_torch_gpu_{ quant }_test_reports"].paths: artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) if "stats" in artifact: # Link to the GitHub Action job