Skip to content

Commit

Permalink
fix quantization ci
Browse files Browse the repository at this point in the history
  • Loading branch information
SunMarc committed Apr 9, 2024
1 parent ff0d5a8 commit 176e055
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 55 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build-docker-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ name: Build docker images (scheduled)
on:
push:
branches:
# - build_ci_docker_image*
- fix-quantization-tests
- build_ci_docker_image*
repository_dispatch:
workflow_call:
inputs:
Expand Down
81 changes: 41 additions & 40 deletions .github/workflows/self-scheduled-caller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,48 +7,49 @@ on:
- cron: "17 2 * * *"
push:
branches:
- run_scheduled_ci*
# - run_scheduled_ci*
- fix-quantization-tests

jobs:
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_tests_gpu
slack_report_channel: "#transformers-ci-daily-models"
secrets: inherit

torch-pipeline:
name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
secrets: inherit

tf-pipeline:
name: TF pipeline CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_pipelines_tf_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
secrets: inherit

example-ci:
name: Example CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-examples"
secrets: inherit

deepspeed-ci:
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_all_tests_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-deepspeed"
secrets: inherit
# model-ci:
# name: Model CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_tests_gpu
# slack_report_channel: "#transformers-ci-daily-models"
# secrets: inherit

# torch-pipeline:
# name: Torch pipeline CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_pipelines_torch_gpu
# slack_report_channel: "#transformers-ci-daily-pipeline-torch"
# secrets: inherit

# tf-pipeline:
# name: TF pipeline CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_pipelines_tf_gpu
# slack_report_channel: "#transformers-ci-daily-pipeline-tf"
# secrets: inherit

# example-ci:
# name: Example CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_examples_gpu
# slack_report_channel: "#transformers-ci-daily-examples"
# secrets: inherit

# deepspeed-ci:
# name: DeepSpeed CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_all_tests_torch_cuda_extensions_gpu
# slack_report_channel: "#transformers-ci-daily-deepspeed"
# secrets: inherit

quantization-ci:
name: Quantization CI
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/self-scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ env:

jobs:
setup:
if: ${{ inputs.job == 'run_tests_gpu' }}
name: Setup
strategy:
matrix:
Expand Down Expand Up @@ -64,13 +63,15 @@ jobs:
run: pip freeze

- id: set-matrix
if: ${{ inputs.job == 'run_tests_gpu' }}
name: Identify models to test
working-directory: /transformers/tests
run: |
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
- id: set-matrix-quantization
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
name: Identify quantization method to test
working-directory: /transformers/tests
run: |
Expand Down Expand Up @@ -430,4 +431,6 @@ jobs:
slack_report_channel: ${{ inputs.slack_report_channel }}
# This would be an empty string if `setup` is skipped.
folder_slices: ${{ needs.setup.outputs.folder_slices }}
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}

secrets: inherit
23 changes: 22 additions & 1 deletion .github/workflows/slack-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ on:
folder_slices:
required: true
type: string
quantization_matrix:
required: true
type: string


jobs:
Expand Down Expand Up @@ -53,7 +56,25 @@ jobs:
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py "${{ inputs.folder_slices }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack for quantization workflow
if: ${{ inputs.slack_report_channel == 'run_tests_quantization_torch_gpu' }}
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
SETUP_STATUS: ${{ inputs.setup_status }}
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
# Only the model testing job is concerned for this step
Expand Down
3 changes: 1 addition & 2 deletions utils/notification_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,8 +1075,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
"run_pipelines_torch_gpu": "PyTorch pipelines",
"run_pipelines_tf_gpu": "TensorFlow pipelines",
"run_examples_gpu": "Examples directory",
"run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests",
"run_tests_quantization_torch_gpu": "Quantization tests",
"run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests"
}

# Remove some entries in `additional_files` if they are not concerned.
Expand Down
11 changes: 2 additions & 9 deletions utils/notification_service_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def post(self):
text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."

self.thread_ts = client.chat_postMessage(
channel="#transformers-ci-daily-quantization",
channel=SLACK_REPORT_CHANNEL_ID,
blocks=payload,
text=text,
)
Expand Down Expand Up @@ -166,12 +166,9 @@ def post_reply(self):

if __name__ == "__main__":
setup_status = os.environ.get("SETUP_STATUS")
SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
setup_failed = True if setup_status is not None and setup_status != "success" else False

org = "huggingface"
repo = "transformers"
repository_full_name = f"{org}/{repo}"

# This env. variable is set in workflow file (under the job `send_results`).
ci_event = os.environ["CI_EVENT"]

Expand Down Expand Up @@ -244,11 +241,7 @@ def post_reply(self):
quantization_results[quant]["failures"][artifact_path["gpu"]].append(
{"line": line, "trace": stacktraces.pop(0)}
)
if not os.path.isdir(os.path.join(os.getcwd(), "prev_ci_results")):
os.makedirs(os.path.join(os.getcwd(), "prev_ci_results"))

with open("prev_ci_results/quantization_results.json", "w", encoding="UTF-8") as fp:
json.dump(quantization_results, fp, indent=4, ensure_ascii=False)
message = QuantizationMessage(
title,
results=quantization_results,
Expand Down

0 comments on commit 176e055

Please sign in to comment.