Add AWQ quantization inference support (#1019) #1301
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and push docker image to internal registry | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- 'main' | |
tags: | |
- 'v*' | |
pull_request: | |
paths: | |
- ".github/workflows/build.yaml" | |
- "integration-tests/**" | |
- "server/**" | |
- "proto/**" | |
- "router/**" | |
- "launcher/**" | |
- "Cargo.lock" | |
- "rust-toolchain.toml" | |
- "Dockerfile" | |
branches: | |
- 'main' | |
jobs: | |
start-runner: | |
name: Start self-hosted EC2 runner | |
runs-on: ubuntu-latest | |
env: | |
AWS_REGION: us-east-1 | |
EC2_AMI_ID: ami-03cfed9ea28f4b002 | |
EC2_INSTANCE_TYPE: g5.12xlarge | |
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc | |
EC2_SECURITY_GROUP: sg-030175c435ac141d6 | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ env.AWS_REGION }} | |
- name: Start EC2 runner | |
id: start-ec2-runner | |
uses: philschmid/philschmid-ec2-github-runner@main | |
with: | |
mode: start | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
ec2-image-id: ${{ env.EC2_AMI_ID }} | |
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} | |
subnet-id: ${{ env.EC2_SUBNET_ID }} | |
security-group-id: ${{ env.EC2_SECURITY_GROUP }} | |
aws-resource-tags: > # optional, requires additional permissions | |
[ | |
{"Key": "Name", "Value": "ec2-tgi-github-runner"}, | |
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"} | |
] | |
build-and-push-image: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
needs: start-runner # required to start the main job when the runner is ready | |
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner | |
permissions: | |
contents: write | |
packages: write | |
# This is used to complete the identity challenge | |
# with sigstore/fulcio when running outside of PRs. | |
id-token: write | |
security-events: write | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Initialize Docker Buildx | |
uses: docker/[email protected] | |
with: | |
install: true | |
- name: Inject slug/short variables | |
uses: rlespinasse/[email protected] | |
- name: Install cosign | |
if: github.event_name != 'pull_request' | |
uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0 | |
with: | |
cosign-release: 'v1.13.1' | |
- name: Tailscale | |
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 | |
with: | |
authkey: ${{ secrets.TAILSCALE_AUTHKEY }} | |
- name: Login to GitHub Container Registry | |
if: github.event_name != 'pull_request' | |
uses: docker/login-action@v2 | |
with: | |
registry: ghcr.io | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
- name: Login to internal Container Registry | |
uses: docker/[email protected] | |
with: | |
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} | |
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} | |
registry: registry.internal.huggingface.tech | |
- name: Login to Azure Container Registry | |
if: github.event_name != 'pull_request' | |
uses: docker/[email protected] | |
with: | |
username: ${{ secrets.AZURE_DOCKER_USERNAME }} | |
password: ${{ secrets.AZURE_DOCKER_PASSWORD }} | |
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io | |
# If pull request | |
- name: Extract metadata (tags, labels) for Docker | |
if: ${{ github.event_name == 'pull_request' }} | |
id: meta-pr | |
uses: docker/[email protected] | |
with: | |
images: | | |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | |
tags: | | |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} | |
# If main, release or tag | |
- name: Extract metadata (tags, labels) for Docker | |
if: ${{ github.event_name != 'pull_request' }} | |
id: meta | |
uses: docker/[email protected] | |
with: | |
flavor: | | |
latest=auto | |
images: | | |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | |
ghcr.io/huggingface/text-generation-inference | |
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference | |
tags: | | |
type=semver,pattern={{version}} | |
type=semver,pattern={{major}}.{{minor}} | |
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} | |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} | |
- name: Build and push Docker image | |
id: build-and-push | |
uses: docker/build-push-action@v4 | |
with: | |
context: . | |
file: Dockerfile | |
push: true | |
platforms: 'linux/amd64' | |
build-args: | | |
GIT_SHA=${{ env.GITHUB_SHA }} | |
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} | |
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} | |
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} | |
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min | |
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min | |
# Sign the resulting Docker image digest except on PRs. | |
# This will only write to the public Rekor transparency log when the Docker | |
# repository is public to avoid leaking data. | |
- name: Sign the published Docker image | |
if: ${{ github.event_name != 'pull_request' }} | |
env: | |
COSIGN_EXPERIMENTAL: "true" | |
# This step uses the identity token to provision an ephemeral certificate | |
# against the sigstore community Fulcio instance. | |
run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }} | |
- name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph | |
uses: aquasecurity/trivy-action@master | |
if: ${{ github.event_name != 'pull_request' }} | |
with: | |
image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}' | |
format: 'github' | |
output: 'dependency-results.sbom.json' | |
github-pat: ${{ secrets.GITHUB_TOKEN }} | |
scanners: 'vuln' | |
- name: Run Trivy vulnerability scanner | |
uses: aquasecurity/trivy-action@master | |
if: ${{ github.event_name != 'pull_request' }} | |
with: | |
image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}' | |
format: 'sarif' | |
output: 'trivy-results.sarif' | |
severity: 'CRITICAL' | |
scanners: 'vuln' | |
- name: Upload Trivy scan results to GitHub Security tab | |
uses: github/codeql-action/upload-sarif@v2 | |
if: ${{ github.event_name != 'pull_request' }} | |
with: | |
sarif_file: 'trivy-results.sarif' | |
integration-tests: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
needs: | |
- start-runner | |
- build-and-push-image # Wait for the docker image to be built | |
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner | |
env: | |
DOCKER_VOLUME: /cache | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Inject slug/short variables | |
uses: rlespinasse/[email protected] | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- name: Tailscale | |
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 | |
with: | |
authkey: ${{ secrets.TAILSCALE_AUTHKEY }} | |
- name: Prepare disks | |
run: | | |
sudo mkfs -t ext4 /dev/nvme1n1 | |
sudo mkdir ${{ env.DOCKER_VOLUME }} | |
sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }} | |
- name: Install | |
run: | | |
make install-integration-tests | |
- name: Run tests | |
run: | | |
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }} | |
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
pytest -s -vv integration-tests | |
stop-runner: | |
name: Stop self-hosted EC2 runner | |
needs: | |
- start-runner | |
- build-and-push-image | |
- integration-tests | |
runs-on: ubuntu-latest | |
env: | |
AWS_REGION: us-east-1 | |
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ env.AWS_REGION }} | |
- name: Stop EC2 runner | |
uses: philschmid/philschmid-ec2-github-runner@main | |
with: | |
mode: stop | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
label: ${{ needs.start-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |