.github/workflows/integration_execute.yml

name: Integration Tests Executor

on:
  workflow_dispatch:
    inputs:
      test:
        description: 'Which test to run as a pytest keyword expression. Can be a class in tests.py or "TestClass and test_fun_name"'
        required: false
        default: ""
      mark:
        description: 'Which tests to run as a pytest marker expression. Should be a mark from pytest.ini'
        required: false
        default: ""
      instance:
        description: 'Instance used for testing'
        required: true
        default: 'action_g6'
        type: choice
        options:
          - action_g6
          - action_graviton
          - action_inf2
      djl-version:
        description: 'The released version of DJL. Can be "nightly", "temp", or a DJL release version like "0.28.0"'
        required: false
        default: 'temp'

permissions:
  id-token: write
  contents: read

jobs:
  build-temp:
    if: ${{ inputs.djl-version == 'temp' }}
    uses: ./.github/workflows/docker-nightly-publish.yml
    secrets: inherit
    with:
      mode: temp
  create-runners:
    runs-on: [self-hosted, scheduler]
    if: |
      always() && (needs.build-temp.result == 'success' || needs.build-temp.result == 'skipped')
    needs: [build-temp]
    steps:
      - name: Create new instance
        id: create_instance
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh ${{ inputs.instance }} $token djl-serving
    outputs:
      instance_id: ${{ steps.create_instance.outputs.instance_id }}
      label: ${{ steps.create_instance.outputs.label }}
  test:
    runs-on: [ self-hosted, "${{ needs.create-runners.outputs.label }}"]
    timeout-minutes: 60
    needs: [create-runners, build-temp]
    if: |
      always() && needs.create-runners.result == 'success' &&
      (needs.build-temp.result == 'success' || needs.build-temp.result == 'skipped')
    steps:
      - uses: actions/checkout@v4
      - name: Clean env
        run: |
          yes | docker system prune -a --volumes
          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
          echo "wait dpkg lock..."
          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
      - name: Set up Python3
        if: ${{ needs.create-runners.outputs.label != 'aarch64' }}
        uses: actions/setup-python@v5
        with:
          python-version: '3.10.x'
      - name: Set up Python3 (aarch64)
        if: ${{ needs.create-runners.outputs.label == 'aarch64' }}
        run: |
          # Using an alternate installation because of an incompatible combination
          # of aarch64 with ubuntu-20.04 not supported by the actions/setup-python
          sudo apt-get install python3 python-is-python3 python3-pip -y
      - name: Install pip dependencies
        run: pip3 install pytest requests "numpy<2" pillow huggingface_hub
      - name: Install torch
        # Use torch to get cuda capability of current device to selectively run tests
        # Torch version doesn't really matter that much
        run: |
          pip3 install torch==2.3.0
      - name: install awscli
        run: |
          sudo apt-get update
          sudo apt-get install awscli -y
      - name: Configure AWS Credentials
        uses: aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
          aws-region: us-east-1
      - name: Login for temp
        if: ${{ inputs.djl-version == 'temp' }}
        run: |
          aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp
      - name: Install awscurl
        working-directory: tests/integration
        run: |
          wget https://publish.djl.ai/awscurl/awscurl
          chmod +x awscurl
          mkdir outputs
      - name: Test
        working-directory: tests/integration
        env:
          TEST_DJL_VERSION: ${{ inputs.djl-version }}
        run: |
          python -m pytest tests.py -k "${{ inputs.test }}"  -m "${{ inputs.mark }}"
      - name: Cleanup
        working-directory: tests/integration
        run: |
          rm -rf outputs
          rm awscurl
      - name: On Failure
        if: ${{ failure() }}
        working-directory: tests/integration
        run: |
          for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
          sudo rm -rf outputs && sudo rm -rf models
          rm awscurl
          docker rm -f $(docker ps -aq) || true
      - name: Upload test logs
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: test-${{ inputs.test }}-logs
          path: tests/integration/all_logs/

  stop-runners:
    if: always()
    runs-on: [ self-hosted, scheduler ]
    needs: [ build-temp, create-runners, test]
    steps:
      - name: Stop all instances
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          instance_id=${{ needs.create-runners.outputs.instance_id }}
          ./stop_instance.sh $instance_id