From 2fff50d2a368dfebedf16fb5179bc8647bc81ff2 Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Tue, 9 Apr 2024 15:42:02 +0200 Subject: [PATCH] better quantization_matrix --- .github/workflows/build-docker-images.yml | 556 +++++++++++----------- .github/workflows/self-scheduled.yml | 2 +- utils/notification_service.py | 2 +- 3 files changed, 280 insertions(+), 280 deletions(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index ec01b84bb3288f..23424ffb83ac63 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -18,291 +18,291 @@ concurrency: cancel-in-progress: false jobs: - # latest-docker: - # name: "Latest PyTorch + TensorFlow [dev]" - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - - # name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-all-latest-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }} - # # Push CI images still need to be re-built daily - # - - # name: Build and push (for Push CI) in a daily basis - # # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. - # # The later case is useful for manual image building for debugging purpose. Use another tag in this case! - # if: inputs.image_postfix != '-push-ci' - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-all-latest-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-all-latest-gpu-push-ci + latest-docker: + name: "Latest PyTorch + TensorFlow [dev]" + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-all-latest-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }} + # Push CI images still need to be re-built daily + - + name: Build and push (for Push CI) in a daily basis + # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. + # The later case is useful for manual image building for debugging purpose. Use another tag in this case! + if: inputs.image_postfix != '-push-ci' + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-all-latest-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-all-latest-gpu-push-ci - # latest-torch-deepspeed-docker: - # name: "Latest PyTorch + DeepSpeed" - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - name: Cleanup disk - # run: | - # sudo ls -l /usr/local/lib/ - # sudo ls -l /usr/share/ - # sudo du -sh /usr/local/lib/ - # sudo du -sh /usr/share/ - # sudo rm -rf /usr/local/lib/android - # sudo rm -rf /usr/share/dotnet - # sudo du -sh /usr/local/lib/ - # sudo du -sh /usr/share/ - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - - # name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-deepspeed-latest-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }} + latest-torch-deepspeed-docker: + name: "Latest PyTorch + DeepSpeed" + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-deepspeed-latest-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }} - # # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`) - # latest-torch-deepspeed-docker-for-push-ci-daily-build: - # name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - name: Cleanup disk - # run: | - # sudo ls -l /usr/local/lib/ - # sudo ls -l /usr/share/ - # sudo du -sh /usr/local/lib/ - # sudo du -sh /usr/share/ - # sudo rm -rf /usr/local/lib/android - # sudo rm -rf /usr/share/dotnet - # sudo du -sh /usr/local/lib/ - # sudo du -sh /usr/share/ - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # # Push CI images still need to be re-built daily - # - - # name: Build and push (for Push CI) in a daily basis - # # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. - # # The later case is useful for manual image building for debugging purpose. Use another tag in this case! - # if: inputs.image_postfix != '-push-ci' - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-deepspeed-latest-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci + # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`) + latest-torch-deepspeed-docker-for-push-ci-daily-build: + name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + # Push CI images still need to be re-built daily + - + name: Build and push (for Push CI) in a daily basis + # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. + # The later case is useful for manual image building for debugging purpose. Use another tag in this case! + if: inputs.image_postfix != '-push-ci' + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-deepspeed-latest-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci - # doc-builder: - # name: "Doc builder" - # # Push CI doesn't need this image - # if: inputs.image_postfix != '-push-ci' - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - - # name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-doc-builder - # push: true - # tags: huggingface/transformers-doc-builder + doc-builder: + name: "Doc builder" + # Push CI doesn't need this image + if: inputs.image_postfix != '-push-ci' + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-doc-builder + push: true + tags: huggingface/transformers-doc-builder - # latest-pytorch: - # name: "Latest PyTorch [dev]" - # # Push CI doesn't need this image - # if: inputs.image_postfix != '-push-ci' - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - name: Cleanup disk - # run: | - # sudo ls -l /usr/local/lib/ - # sudo ls -l /usr/share/ - # sudo du -sh /usr/local/lib/ - # sudo du -sh /usr/share/ - # sudo rm -rf /usr/local/lib/android - # sudo rm -rf /usr/share/dotnet - # sudo du -sh /usr/local/lib/ - # sudo du -sh /usr/share/ - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - - # name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-gpu + latest-pytorch: + name: "Latest PyTorch [dev]" + # Push CI doesn't need this image + if: inputs.image_postfix != '-push-ci' + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-gpu - # latest-pytorch-amd: - # name: "Latest PyTorch (AMD) [dev]" - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - - # name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-amd-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }} - # # Push CI images still need to be re-built daily - # - - # name: Build and push (for Push CI) in a daily basis - # # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. - # # The later case is useful for manual image building for debugging purpose. Use another tag in this case! - # if: inputs.image_postfix != '-push-ci' - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-amd-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-amd-gpu-push-ci + latest-pytorch-amd: + name: "Latest PyTorch (AMD) [dev]" + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }} + # Push CI images still need to be re-built daily + - + name: Build and push (for Push CI) in a daily basis + # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. + # The later case is useful for manual image building for debugging purpose. Use another tag in this case! + if: inputs.image_postfix != '-push-ci' + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-amd-gpu-push-ci - # latest-tensorflow: - # name: "Latest TensorFlow [dev]" - # # Push CI doesn't need this image - # if: inputs.image_postfix != '-push-ci' - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - - # name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-tensorflow-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-tensorflow-gpu + latest-tensorflow: + name: "Latest TensorFlow [dev]" + # Push CI doesn't need this image + if: inputs.image_postfix != '-push-ci' + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-tensorflow-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-tensorflow-gpu - # latest-pytorch-deepspeed-amd: - # name: "PyTorch + DeepSpeed (AMD) [dev]" - # runs-on: [intel-cpu, 8-cpu, ci] - # steps: - # - - # name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - - # name: Check out code - # uses: actions/checkout@v3 - # - - # name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - - # name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-deepspeed-amd-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }} - # # Push CI images still need to be re-built daily - # - - # name: Build and push (for Push CI) in a daily basis - # # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. - # # The later case is useful for manual image building for debugging purpose. Use another tag in this case! - # if: inputs.image_postfix != '-push-ci' - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-deepspeed-amd-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci + latest-pytorch-deepspeed-amd: + name: "PyTorch + DeepSpeed (AMD) [dev]" + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-deepspeed-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }} + # Push CI images still need to be re-built daily + - + name: Build and push (for Push CI) in a daily basis + # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. + # The later case is useful for manual image building for debugging purpose. Use another tag in this case! + if: inputs.image_postfix != '-push-ci' + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-deepspeed-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci latest-quantization-torch-docker: name: "Latest Pytorch + Quantization [dev]" diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 8277d51b5b14f8..631c10a29df6d2 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -75,7 +75,7 @@ jobs: name: Identify quantization method to test working-directory: /transformers/tests run: | - echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT + echo "quantization_matrix=$(find "quantization" -mindepth 1 -maxdepth 1 -type d | sort)" >> $GITHUB_OUTPUT - name: NVIDIA-SMI run: | diff --git a/utils/notification_service.py b/utils/notification_service.py index 2d501aa5a1c125..158e01942b81fa 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1075,7 +1075,7 @@ def prepare_reports(title, header, reports, to_truncate=True): "run_pipelines_torch_gpu": "PyTorch pipelines", "run_pipelines_tf_gpu": "TensorFlow pipelines", "run_examples_gpu": "Examples directory", - "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests" + "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests", } # Remove some entries in `additional_files` if they are not concerned.