From 4f9a39a862b9f5d91c54e531ecec38681a0f6818 Mon Sep 17 00:00:00 2001
From: ydshieh <ydshieh@users.noreply.github.com>
Date: Tue, 2 Apr 2024 15:55:55 +0200
Subject: [PATCH] Add torch/tf pipeline

---
 .github/workflows/self-scheduled-caller.yml |  18 ++
 .github/workflows/self-scheduled.yml        | 206 ++++++++++----------
 utils/notification_service.py               |   6 +-
 3 files changed, 125 insertions(+), 105 deletions(-)

diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml
index 90e2b889dabe3b..2eabb622752c2b 100644
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@@ -25,6 +25,24 @@ jobs:
       env_name_for_slack_report_channel: CI_SLACK_CHANNEL_DUMMY_TESTS
     secrets: inherit
 
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_pipelines_torch_gpu
+      # See the comment for `ENV_NAME_FOR_CI_SLACK_REPORT_CHANNEL_ID` in `.github/workflows/slack-report.yml`.
+      env_name_for_slack_report_channel: CI_SLACK_CHANNEL_DUMMY_TESTS
+    secrets: inherit
+
+  tf-pipeline:
+    name: TF pipeline CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_pipelines_tf_gpu
+      # See the comment for `ENV_NAME_FOR_CI_SLACK_REPORT_CHANNEL_ID` in `.github/workflows/slack-report.yml`.
+      env_name_for_slack_report_channel: CI_SLACK_CHANNEL_DUMMY_TESTS
+    secrets: inherit
+
   example-ci:
     name: Example CI
     uses: ./.github/workflows/self-scheduled.yml
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 71dde7d67a1684..1fd80d44244282 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -89,6 +89,109 @@ jobs:
       slice_id: ${{ matrix.slice_id }}
     secrets: inherit
 
+  run_pipelines_torch_gpu:
+    if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
+    name: PyTorch pipelines
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+    container:
+      image: huggingface/transformers-pytorch-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all pipeline tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
+
+  run_pipelines_tf_gpu:
+    if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
+    name: TensorFlow pipelines
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+    container:
+      image: huggingface/transformers-tensorflow-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: |
+          git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all pipeline tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: |
+          cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
+
   run_examples_gpu:
     if: ${{ inputs.job == 'run_examples_gpu' }}
     name: Examples directory
@@ -140,107 +243,6 @@ jobs:
           name: ${{ matrix.machine_type }}_run_examples_gpu
           path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
 
-#  run_pipelines_torch_gpu:
-#    name: PyTorch pipelines
-#    strategy:
-#      fail-fast: false
-#      matrix:
-#        machine_type: [single-gpu, multi-gpu]
-#    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-#    container:
-#      image: huggingface/transformers-pytorch-gpu
-#      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-#    needs: setup
-#    steps:
-#      - name: Update clone
-#        working-directory: /transformers
-#        run: git fetch && git checkout ${{ github.sha }}
-#
-#      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-#        working-directory: /transformers
-#        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-#
-#      - name: NVIDIA-SMI
-#        run: |
-#          nvidia-smi
-#
-#      - name: Environment
-#        working-directory: /transformers
-#        run: |
-#          python3 utils/print_env.py
-#
-#      - name: Show installed libraries and their versions
-#        working-directory: /transformers
-#        run: pip freeze
-#
-#      - name: Run all pipeline tests on GPU
-#        working-directory: /transformers
-#        run: |
-#          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
-#
-#      - name: Failure short reports
-#        if: ${{ failure() }}
-#        continue-on-error: true
-#        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
-#
-#      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
-#        if: ${{ always() }}
-#        uses: actions/upload-artifact@v3
-#        with:
-#          name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
-#          path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
-#
-#  run_pipelines_tf_gpu:
-#    name: TensorFlow pipelines
-#    strategy:
-#      fail-fast: false
-#      matrix:
-#        machine_type: [single-gpu, multi-gpu]
-#    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-#    container:
-#      image: huggingface/transformers-tensorflow-gpu
-#      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-#    needs: setup
-#    steps:
-#      - name: Update clone
-#        working-directory: /transformers
-#        run: |
-#          git fetch && git checkout ${{ github.sha }}
-#
-#      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-#        working-directory: /transformers
-#        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-#
-#      - name: NVIDIA-SMI
-#        run: |
-#          nvidia-smi
-#
-#      - name: Environment
-#        working-directory: /transformers
-#        run: |
-#          python3 utils/print_env.py
-#
-#      - name: Show installed libraries and their versions
-#        working-directory: /transformers
-#        run: pip freeze
-#
-#      - name: Run all pipeline tests on GPU
-#        working-directory: /transformers
-#        run: |
-#          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
-#
-#      - name: Failure short reports
-#        if: ${{ always() }}
-#        run: |
-#          cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
-#
-#      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
-#        if: ${{ always() }}
-#        uses: actions/upload-artifact@v3
-#        with:
-#          name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
-#          path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
-#
   run_all_tests_torch_cuda_extensions_gpu:
     if: ${{ inputs.job == 'run_all_tests_torch_cuda_extensions_gpu' }}
     name: Torch CUDA extension tests
@@ -394,7 +396,7 @@ jobs:
 
   send_results:
     name: Slack Report
-    needs: [setup, run_tests_gpu, run_examples_gpu, run_all_tests_torch_cuda_extensions_gpu, run_tests_quantization_torch_gpu, run_extract_warnings]
+    needs: [setup, run_tests_gpu, run_pipelines_torch_gpu, run_pipelines_tf_gpu, run_examples_gpu, run_all_tests_torch_cuda_extensions_gpu, run_tests_quantization_torch_gpu, run_extract_warnings]
     if: ${{ always() }}
     uses: ./.github/workflows/slack-report.yml
     with:
diff --git a/utils/notification_service.py b/utils/notification_service.py
index e20d8c632fc5ea..b6b200d9562251 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -1058,9 +1058,9 @@ def prepare_reports(title, header, reports, to_truncate=True):
 
     # Additional runs
     additional_files = {
-        "Examples directory": "run_examples_gpu",
         "PyTorch pipelines": "run_tests_torch_pipeline_gpu",
         "TensorFlow pipelines": "run_tests_tf_pipeline_gpu",
+        "Examples directory": "run_examples_gpu",
         "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
         "Quantization tests": "run_tests_quantization_torch_gpu",
     }
@@ -1079,9 +1079,9 @@ def prepare_reports(title, header, reports, to_truncate=True):
     # `additional_files`. This is used to remove some entries in `additional_files` that are not concerned by a
     # specific job. See below.
     job_to_test_map = {
+        "run_pipelines_torch_gpu": "PyTorch pipelines",
+        "run_pipelines_tf_gpu": "TensorFlow pipelines",
         "run_examples_gpu": "Examples directory",
-        # "": "PyTorch pipelines",
-        # "": "TensorFlow pipelines",
         "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests",
         "run_tests_quantization_torch_gpu": "Quantization tests",
     }