From 176e055e8252c088707123aeea51e9de59163fd2 Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Tue, 9 Apr 2024 12:29:37 +0200 Subject: [PATCH] fix quantization ci --- .github/workflows/build-docker-images.yml | 3 +- .github/workflows/self-scheduled-caller.yml | 81 +++++++++++---------- .github/workflows/self-scheduled.yml | 5 +- .github/workflows/slack-report.yml | 23 +++++- utils/notification_service.py | 3 +- utils/notification_service_quantization.py | 11 +-- 6 files changed, 71 insertions(+), 55 deletions(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 5f8b2d848aa51f..ec01b84bb3288f 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -3,8 +3,7 @@ name: Build docker images (scheduled) on: push: branches: - # - build_ci_docker_image* - - fix-quantization-tests + - build_ci_docker_image* repository_dispatch: workflow_call: inputs: diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index 59b992bcd250e2..a247380bae57c5 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -7,48 +7,49 @@ on: - cron: "17 2 * * *" push: branches: - - run_scheduled_ci* + # - run_scheduled_ci* + - fix-quantization-tests jobs: - model-ci: - name: Model CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_tests_gpu - slack_report_channel: "#transformers-ci-daily-models" - secrets: inherit - - torch-pipeline: - name: Torch pipeline CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_pipelines_torch_gpu - slack_report_channel: "#transformers-ci-daily-pipeline-torch" - secrets: inherit - - tf-pipeline: - name: TF pipeline CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_pipelines_tf_gpu - slack_report_channel: "#transformers-ci-daily-pipeline-tf" - secrets: inherit - - example-ci: - name: Example CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_examples_gpu - slack_report_channel: "#transformers-ci-daily-examples" - secrets: inherit - - deepspeed-ci: - name: DeepSpeed CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_all_tests_torch_cuda_extensions_gpu - slack_report_channel: "#transformers-ci-daily-deepspeed" - secrets: inherit + # model-ci: + # name: Model CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_tests_gpu + # slack_report_channel: "#transformers-ci-daily-models" + # secrets: inherit + + # torch-pipeline: + # name: Torch pipeline CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_pipelines_torch_gpu + # slack_report_channel: "#transformers-ci-daily-pipeline-torch" + # secrets: inherit + + # tf-pipeline: + # name: TF pipeline CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_pipelines_tf_gpu + # slack_report_channel: "#transformers-ci-daily-pipeline-tf" + # secrets: inherit + + # example-ci: + # name: Example CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_examples_gpu + # slack_report_channel: "#transformers-ci-daily-examples" + # secrets: inherit + + # deepspeed-ci: + # name: DeepSpeed CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_all_tests_torch_cuda_extensions_gpu + # slack_report_channel: "#transformers-ci-daily-deepspeed" + # secrets: inherit quantization-ci: name: Quantization CI diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 67235eb115cea3..81620b740ba81d 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -33,7 +33,6 @@ env: jobs: setup: - if: ${{ inputs.job == 'run_tests_gpu' }} name: Setup strategy: matrix: @@ -64,6 +63,7 @@ jobs: run: pip freeze - id: set-matrix + if: ${{ inputs.job == 'run_tests_gpu' }} name: Identify models to test working-directory: /transformers/tests run: | @@ -71,6 +71,7 @@ jobs: echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - id: set-matrix-quantization + if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} name: Identify quantization method to test working-directory: /transformers/tests run: | @@ -430,4 +431,6 @@ jobs: slack_report_channel: ${{ inputs.slack_report_channel }} # This would be an empty string if `setup` is skipped. folder_slices: ${{ needs.setup.outputs.folder_slices }} + quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }} + secrets: inherit diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml index 0e964e8596a0f5..840f9e7300ff5a 100644 --- a/.github/workflows/slack-report.yml +++ b/.github/workflows/slack-report.yml @@ -15,6 +15,9 @@ on: folder_slices: required: true type: string + quantization_matrix: + required: true + type: string jobs: @@ -53,7 +56,25 @@ jobs: pip install slack_sdk pip show slack_sdk python utils/notification_service.py "${{ inputs.folder_slices }}" - + + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + - name: Send message to Slack for quantization workflow + if: ${{ inputs.slack_report_channel == 'run_tests_quantization_torch_gpu' }} + env: + CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} + ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} + CI_EVENT: scheduled + CI_SHA: ${{ github.sha }} + SETUP_STATUS: ${{ inputs.setup_status }} + # We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change + # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`. + run: | + sudo apt-get install -y curl + pip install slack_sdk + pip show slack_sdk + python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" + # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - name: Failure table artifacts # Only the model testing job is concerned for this step diff --git a/utils/notification_service.py b/utils/notification_service.py index e9255dd076377a..2d501aa5a1c125 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1075,8 +1075,7 @@ def prepare_reports(title, header, reports, to_truncate=True): "run_pipelines_torch_gpu": "PyTorch pipelines", "run_pipelines_tf_gpu": "TensorFlow pipelines", "run_examples_gpu": "Examples directory", - "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests", - "run_tests_quantization_torch_gpu": "Quantization tests", + "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests" } # Remove some entries in `additional_files` if they are not concerned. diff --git a/utils/notification_service_quantization.py b/utils/notification_service_quantization.py index 0bfb4f31ab8352..11bc57e618a7e4 100644 --- a/utils/notification_service_quantization.py +++ b/utils/notification_service_quantization.py @@ -132,7 +132,7 @@ def post(self): text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed." self.thread_ts = client.chat_postMessage( - channel="#transformers-ci-daily-quantization", + channel=SLACK_REPORT_CHANNEL_ID, blocks=payload, text=text, ) @@ -166,12 +166,9 @@ def post_reply(self): if __name__ == "__main__": setup_status = os.environ.get("SETUP_STATUS") + SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"] setup_failed = True if setup_status is not None and setup_status != "success" else False - org = "huggingface" - repo = "transformers" - repository_full_name = f"{org}/{repo}" - # This env. variable is set in workflow file (under the job `send_results`). ci_event = os.environ["CI_EVENT"] @@ -244,11 +241,7 @@ def post_reply(self): quantization_results[quant]["failures"][artifact_path["gpu"]].append( {"line": line, "trace": stacktraces.pop(0)} ) - if not os.path.isdir(os.path.join(os.getcwd(), "prev_ci_results")): - os.makedirs(os.path.join(os.getcwd(), "prev_ci_results")) - with open("prev_ci_results/quantization_results.json", "w", encoding="UTF-8") as fp: - json.dump(quantization_results, fp, indent=4, ensure_ascii=False) message = QuantizationMessage( title, results=quantization_results,