huggingface · Narsil · Jun 4, 2024 · Jun 4, 2024 · Jun 4, 2024 · Jun 4, 2024
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -182,9 +182,9 @@ jobs:
           export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
           pytest -s -vv integration-tests
 
-  build-and-push-image-rocm:
+  build-and-push-image-intel:
     concurrency:
-      group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }}
+      group: ${{ github.workflow }}-build-and-push-image-intel-${{ github.head_ref || github.run_id }}
       cancel-in-progress: true
     needs:
       - start-runner
@@ -198,6 +198,9 @@ jobs:
       # with sigstore/fulcio when running outside of PRs.
       id-token: write
       security-events: write
+    outputs:
+      # env is not available in the later `container:`, but previous job outputs are.
+      short_sha: ${{ env.GITHUB_SHA_SHORT }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
@@ -240,7 +243,7 @@ jobs:
           images: |
             registry.internal.huggingface.tech/api-inference/community/text-generation-inference
           tags: |
-            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
+            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-intel
       # If main, release or tag
       - name: Extract metadata (tags, labels) for Docker
         if: ${{ github.event_name != 'pull_request' }}
@@ -254,29 +257,55 @@ jobs:
             ghcr.io/huggingface/text-generation-inference
             db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
           tags: |
-            type=semver,pattern={{version}}-rocm
-            type=semver,pattern={{major}}.{{minor}}-rocm
-            type=raw,value=latest-rocm,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
-            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
+            type=semver,pattern={{version}}-intel
+            type=semver,pattern={{major}}.{{minor}}-intel
+            type=raw,value=latest-intel,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
+            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-intel
       - name: Build and push Docker image
         id: build-and-push
         uses: docker/build-push-action@v4
         with:
           context: .
-          file: Dockerfile_amd
+          file: Dockerfile_intel
           push: true
           platforms: 'linux/amd64'
           build-args: |
             GIT_SHA=${{ env.GITHUB_SHA }}
-            DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
+            DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}-intel
           tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
-          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
-          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
+          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-intel,mode=min
+          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-intel,mode=min
 
-  build-and-push-image-intel:
+  stop-runner:
+    name: Stop self-hosted EC2 runner
+    needs:
+      - start-runner
+      - build-and-push-image
+      - build-and-push-image-intel
+      - integration-tests
+    runs-on: ubuntu-latest
+    env:
+      AWS_REGION: us-east-1
+    if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+      - name: Stop EC2 runner
+        uses: philschmid/philschmid-ec2-github-runner@main
+        with:
+          mode: stop
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          label: ${{ needs.start-runner.outputs.label }}
+          ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
+
+  build-and-push-image-rocm:
     concurrency:
-      group: ${{ github.workflow }}-build-and-push-image-intel-${{ github.head_ref || github.run_id }}
+      group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }}
       cancel-in-progress: true
     needs:
       - start-runner
@@ -290,9 +319,6 @@ jobs:
       # with sigstore/fulcio when running outside of PRs.
       id-token: write
       security-events: write
-    outputs:
-      # env is not available in the later `container:`, but previous job outputs are.
-      short_sha: ${{ env.GITHUB_SHA_SHORT }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
@@ -335,7 +361,7 @@ jobs:
           images: |
             registry.internal.huggingface.tech/api-inference/community/text-generation-inference
           tags: |
-            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-intel
+            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
       # If main, release or tag
       - name: Extract metadata (tags, labels) for Docker
         if: ${{ github.event_name != 'pull_request' }}
@@ -349,85 +375,52 @@ jobs:
             ghcr.io/huggingface/text-generation-inference
             db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
           tags: |
-            type=semver,pattern={{version}}-intel
-            type=semver,pattern={{major}}.{{minor}}-intel
-            type=raw,value=latest-intel,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
-            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-intel
+            type=semver,pattern={{version}}-rocm
+            type=semver,pattern={{major}}.{{minor}}-rocm
+            type=raw,value=latest-rocm,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
+            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
       - name: Build and push Docker image
         id: build-and-push
         uses: docker/build-push-action@v4
         with:
           context: .
-          file: Dockerfile_intel
+          file: Dockerfile_amd
           push: true
           platforms: 'linux/amd64'
           build-args: |
             GIT_SHA=${{ env.GITHUB_SHA }}
-            DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}-intel
+            DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
           tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
-          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-intel,mode=min
-          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-intel,mode=min
-
-  stop-runner:
-    name: Stop self-hosted EC2 runner
+          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
+          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
+  integration-tests-rocm:
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
+      cancel-in-progress: true
+    runs-on:  [amd-gpu-tgi, mi250]
     needs:
-      - start-runner
-      - build-and-push-image
       - build-and-push-image-rocm
-      - build-and-push-image-intel
-      - integration-tests
-    runs-on: ubuntu-latest
-    env:
-      AWS_REGION: us-east-1
-    if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
     steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v1
+      - uses: actions/setup-python@v5
         with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-      - name: Stop EC2 runner
-        uses: philschmid/philschmid-ec2-github-runner@main
-        with:
-          mode: stop
-          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
-          label: ${{ needs.start-runner.outputs.label }}
-          ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
-
-  # TODO: Move this to `build_amd.yml` (and `build_nvidia.yml`)
-
-  # integration-tests-rocm:
-  #   concurrency:
-  #     group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
-  #     cancel-in-progress: true
-  #   needs:
-  #     - start-runner
-  #     - build-and-push-image
-  #     - integration-tests
-  #     - build-and-push-image-rocm
-  #     - stop-runner
-  #   runs-on: [self-hosted, amd-gpu, multi-gpu, mi300]
-  #   container:
-  #     image: registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ needs.build-and-push-image-rocm.outputs.short_sha }}-rocm
-  #     options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/cache
-  #   env:
-  #     DOCKER_VOLUME: /cache
-  #   steps:
-  #     - name: ROCM-SMI
-  #       run: |
-  #         rocm-smi
-  #     - name: ROCM-INFO
-  #       run: |
-  #         rocminfo  | grep "Agent" -A 14
-  #     - name: Show ROCR environment
-  #       run: |
-  #         echo "ROCR: $ROCR_VISIBLE_DEVICES"
-  #     - name: Install
-  #       run: |
-  #         make install-integration-tests
-  #     - name: Run tests
-  #       run: |
-  #         export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-  #         pytest -s -vv integration-tests
+          python-version: '3.10'
+      - uses: actions/checkout@v4
+      - name: install deps
+        run: |
+          make install-integration-tests
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+      - name: Run tests
+        run: |
+          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
+          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          export DOCKER_DEVICES=/dev/kfd,/dev/dri
+          python -m pytest -s -vv integration-tests/models/test_flash_gpt2.py
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,5 @@
 # Rust builder
+# Dummy.
 FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
 WORKDIR /usr/src
 

diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
@@ -34,6 +34,7 @@
 DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None)
 HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN", None)
 DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
+DOCKER_DEVICES = os.getenv("DOCKER_DEVICES")
 
 
 class ResponseComparator(JSONSnapshotExtension):
@@ -435,6 +436,7 @@ def docker_launcher(
             container = client.containers.get(container_name)
             container.stop()
             container.wait()
+            client.containers.prune()
         except NotFound:
             pass
 
@@ -453,16 +455,27 @@ def docker_launcher(
         if DOCKER_VOLUME:
             volumes = [f"{DOCKER_VOLUME}:/data"]
 
+        if DOCKER_DEVICES:
+            devices = DOCKER_DEVICES.split(",")
+            visible = os.getenv("ROCR_VISIBLE_DEVICES")
+            if visible:
+                env["ROCR_VISIBLE_DEVICES"] = visible
+            device_requests = []
+        else:
+            devices = []
+            device_requests = [
+                docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]])
+            ]
+
         container = client.containers.run(
             DOCKER_IMAGE,
             command=args,
             name=container_name,
             environment=env,
             auto_remove=False,
+            devices=devices,
+            device_requests=device_requests,
             detach=True,
-            device_requests=[
-                docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]])
-            ],
             volumes=volumes,
             ports={"80/tcp": port},
             shm_size="1G",

diff --git a/integration-tests/models/test_flash_gpt2.py b/integration-tests/models/test_flash_gpt2.py
@@ -3,7 +3,7 @@
 
 @pytest.fixture(scope="module")
 def flash_gpt2_handle(launcher):
-    with launcher("openai-community/gpt2", num_shard=2) as handle:
+    with launcher("openai-community/gpt2", num_shard=1) as handle:
         yield handle