Build and push HuggingFace TGI docker image

Build and push HuggingFace TGI docker image #48

Workflow file for this run

.github/workflows/build-huggingface.yml at 9089d54

	name: Build and push HuggingFace TGI docker image

	on:
	workflow_dispatch:
	inputs:
	tgi-version:
	description: 'tgi version'
	required: true
	default: '1.1.0'
	pytorch-version:
	description: 'pytorch version'
	required: true
	default: '2.0.1'
	cuda-version:
	description: 'cuda version'
	required: true
	default: '118'
	ubuntu-version:
	description: 'ubuntu version'
	required: true
	default: '20.04'

	jobs:
	create-runner:
	runs-on: [ self-hosted, scheduler ]
	steps:
	- name: Create new G5 instance
	id: create_gpu
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/awslabs/llm-hosting-container/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_g5 $token awslabs/llm-hosting-container
	outputs:
	gpu_instance_id: ${{ steps.create_gpu.outputs.action_g5_instance_id }}

	build-and-push-image:
	runs-on: [ self-hosted, g5 ]
	timeout-minutes: 150
	needs: create-runner
	env:
	TGI_VERSION: ${{github.event.inputs.tgi-version}}
	PYTORCH_VERSION: ${{github.event.inputs.pytorch-version}}
	CUDA_VERSION: ${{github.event.inputs.cuda-version}}
	UBUNTU_VERSION: ${{github.event.inputs.ubuntu-version}}
	steps:
	- uses: actions/checkout@v3
	with:
	repository: huggingface/text-generation-inference
	ref: v${{ env.TGI_VERSION }}
	- uses: actions/checkout@v3
	with:
	path: llm-hosting-container
	- name: Setup Docker buildx
	uses: docker/setup-buildx-action@v2
	with:
	install: true
	- name: Inject slug/short variables
	uses: rlespinasse/[email protected]
	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@v2
	with:
	aws-region: us-east-1
	- name: Login to Amazon ECR
	id: login-ecr
	uses: aws-actions/amazon-ecr-login@v1
	with:
	registries: "125045733377"
	- name: Clean docker env
	run: \|
	yes \| docker system prune -a --volumes
	- name: Build and push docker image
	uses: docker/build-push-action@v4
	env:
	REGISTRY: ${{ steps.login-ecr.outputs.registry }}
	REPOSITORY: djl-serving
	with:
	context: .
	file: llm-hosting-container/huggingface/pytorch/tgi/docker/${{ env.TGI_VERSION }}/py3/cu${{ env.CUDA_VERSION }}/Dockerfile.gpu
	push: true
	target: sagemaker
	platforms: 'linux/amd64'
	provenance: false
	tags: ${{ env.REGISTRY }}/${{ env.REPOSITORY }}:${{ env.PYTORCH_VERSION }}-tgi${{ env.TGI_VERSION }}-gpu-py39-cu${{ env.CUDA_VERSION }}-ubuntu${{ env.UBUNTU_VERSION }}
	cache-from: type=gha
	cache-to: type=gha,mode=max

	run-tests:
	runs-on: [ self-hosted, g5 ]
	timeout-minutes: 30
	needs: [build-and-push-image, create-runner]
	env:
	TGI_VERSION: ${{github.event.inputs.tgi-version}}
	REPOSITORY: djl-serving
	TAG: ${{github.event.inputs.pytorch-version}}-tgi${{github.event.inputs.tgi-version}}-gpu-py39-cu${{github.event.inputs.cuda-version}}-ubuntu${{github.event.inputs.ubuntu-version}}
	steps:
	- uses: actions/checkout@v3
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@v2
	with:
	aws-region: us-east-1
	- name: Login to Amazon ECR
	id: login-ecr
	uses: aws-actions/amazon-ecr-login@v1
	with:
	registries: "125045733377"
	- name: Pull docker
	env:
	REGISTRY: ${{ steps.login-ecr.outputs.registry }}
	run: \|
	docker pull ${REGISTRY}/${REPOSITORY}:${TAG}
	- name: Test bloom-560m
	env:
	REGISTRY: ${{ steps.login-ecr.outputs.registry }}
	run: \|
	set -ex
	HF_MODEL_ID=bigscience/bloom-560m && \
	SM_NUM_GPUS=4 && \
	TGI_VERSION=$TGI_VERSION && \
	docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
	-e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \
	${REGISTRY}/${REPOSITORY}:${TAG}
	sleep 30
	ret=$(curl http://localhost:8080/invocations -X POST \
	-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
	-H 'Content-Type: application/json')
	[[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
	docker rm -f $(docker ps -aq)
	- name: Test gpt-neox-20b
	env:
	REGISTRY: ${{ steps.login-ecr.outputs.registry }}
	run: \|
	set -ex
	HF_MODEL_ID=EleutherAI/gpt-neox-20b && \
	SM_NUM_GPUS=4 && \
	TGI_VERSION=$TGI_VERSION && \
	docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
	-e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \
	${REGISTRY}/${REPOSITORY}:${TAG}
	sleep 400
	ret=$(curl http://localhost:8080/invocations -X POST \
	-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
	-H 'Content-Type: application/json')
	[[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
	docker rm -f $(docker ps -aq)
	- name: Test flan-t5-xxl
	env:
	REGISTRY: ${{ steps.login-ecr.outputs.registry }}
	run: \|
	set -ex
	HF_MODEL_ID=google/flan-t5-xxl && \
	SM_NUM_GPUS=4 && \
	TGI_VERSION=$TGI_VERSION && \
	docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
	-e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \
	${REGISTRY}/${REPOSITORY}:${TAG}
	sleep 400
	ret=$(curl http://localhost:8080/invocations -X POST \
	-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
	-H 'Content-Type: application/json')
	[[ $ret != "[{\"generated_text\""* ]] && exit 1
	docker rm -f $(docker ps -aq)
	- name: On fail step
	if: ${{ failure() }}
	run: \|
	docker rm -f $(docker ps -aq) \|\| true

	stop-runner:
	if: always()
	runs-on: [ self-hosted, scheduler ]
	needs: [run-tests, build-and-push-image, create-runner]
	steps:
	- name: Stop all instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runner.outputs.gpu_instance_id }}
	./stop_instance.sh $instance_id

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Build and push HuggingFace TGI docker image #48

Workflow file

Build and push HuggingFace TGI docker image #48

Jobs

Run details

Workflow file for this run