.github/workflows/self-pr-slow-ci.yml

name: PR slow CI

on:
  pull_request:
    paths:
      - "src/transformers/models/*/modeling_*.py"
      - "tests/**/test_*.py"

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1

jobs:
  find_models_to_run:
      runs-on: ubuntu-22.04
      name: Find models to run slow tests
      # Triggered only if the required label `run-slow` is added
      if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
      outputs:
        models: ${{ steps.models_to_run.outputs.models }}
      steps:
        - uses: actions/checkout@v4
          with:
            fetch-depth: "0"
            ref: ${{ github.event.pull_request.head.sha }}

        - name: Get commit message
          run: |
            echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV

        - name: Get models to run slow tests
          run: |
            echo "${{ env.commit_message }}"
            python -m pip install GitPython
            python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt
            echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV

        - name: Models to run slow tests
          id: models_to_run
          run: |
            echo "${{ env.models }}"
            echo "models=${{ env.models }}" >> $GITHUB_OUTPUT

  run_models_gpu:
      name: Run all tests for the model
      # Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
      # (either a new model PR or via a commit message)
      if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
      needs: find_models_to_run
      strategy:
        fail-fast: false
        matrix:
          folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
      runs-on:
        group: '${{ matrix.machine_type }}'
      container:
        image: huggingface/transformers-all-latest-gpu
        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
      steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
          echo "${{ matrix.folders }}"

      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV

      - name: Update clone
        working-directory: /transformers
        run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision

      - name: NVIDIA-SMI
        run: |
          nvidia-smi

      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV    

      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py

      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze

      - name: Run all tests on GPU
        working-directory: /transformers
        run: |
          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
          echo $CUDA_VISIBLE_DEVICES
          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}

      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt

      - name: Make sure report directory exists
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"

      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports