Skip to content

LMI No-Code tests

LMI No-Code tests #321

Workflow file for this run

name: LMI No-Code tests
on:
workflow_dispatch:
inputs:
djl-version:
description: 'The released version of DJL'
required: false
default: ''
schedule:
- cron: '0 8 * * *'
jobs:
create-runners:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new G5.12xl instance
id: create_gpu_g512_1
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g5 $token djl-serving
- name: Create new G5.12xl instance
id: create_gpu_g512_2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g5 $token djl-serving
- name: Create new G6.12xl instance
id: create_gpu_g612_1
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
- name: Create new G6.12xl instance
id: create_gpu_g612_2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
outputs:
g512_instance_id_1: ${{ steps.create_gpu_g512_1.outputs.action_g5_instance_id }}
g512_instance_id_2: ${{ steps.create_gpu_g512_2.outputs.action_g5_instance_id }}
g612_instance_id_1: ${{ steps.create_gpu_g612_1.outputs.action_g6_instance_id }}
g612_instance_id_2: ${{ steps.create_gpu_g612_2.outputs.action_g6_instance_id }}
create-runners-p4d:
runs-on: [ self-hosted, scheduler ]
steps:
- name: Create new P4d instance
id: create_gpu_p4d
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_lmic_p4d $token djl-serving
outputs:
p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }}
create-runners-inf:
runs-on: [ self-hosted, scheduler ]
steps:
- name: Create new Inf2.24x instance
id: create_inf2_24x
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_inf2 $token djl-serving
outputs:
inf2_24x_instance_id: ${{ steps.create_inf2_24x.outputs.action_inf2_instance_id }}
p4d-no-code-tests:
runs-on: [self-hosted, p4d]
timeout-minutes: 240
needs: create-runners-p4d
strategy:
# Limit to 1 so we don't steal a p4d from another test that may be running
max-parallel: 1
fail-fast: false
matrix:
container: [tensorrt-llm, lmi]
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" pillow
- name: Install s5cmd
working-directory: serving/docker
run: sudo scripts/install_s5cmd.sh x64
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: serving/docker
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Llama3-70b lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-70b-hf/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code llama-70b
./remove_container.sh
- name: CodeLlama lmi container
working-directory: tests/integration
if: ${{ matrix.container == 'lmi' }}
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/CodeLlama-34b-Instruct-hf/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code codellama
./remove_container.sh
- name: Mixtral-8x7b
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/mixtral-8x7b/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code mixtral-8x7b
./remove_container.sh
- name: DBRX lmi container
working-directory: tests/integration
if: ${{ matrix.container == 'lmi' }}
run: |
rm -rf models
echo -e "HF_MODEL_ID=s3://djl-llm/dbrx-instruct/" > docker_env
echo -e "HF_MODEL_TRUST_REMOTE_CODE=true" >> docker_env
echo -e "MODEL_LOADING_TIMEOUT=3600" >> docker_env
echo -e "OPTION_GPU_MEMORY_UTILIZATION=0.95" >> docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code dbrx
./remove_container.sh
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
./remove_container.sh || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: no-code-p4d-${{ matrix.container }}-logs
path: tests/integration/logs/
g-series-no-code-tests:
runs-on: [self-hosted, "${{ matrix.machine }}"]
timeout-minutes: 240
needs: create-runners
strategy:
fail-fast: false
matrix:
container: [tensorrt-llm, lmi]
machine: [g5, g6]
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub pillow
- name: Install s5cmd
working-directory: serving/docker
run: sudo scripts/install_s5cmd.sh x64
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: serving/docker
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Llama2-7b with tgi compat
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-7b-hf/\nOPTION_TGI_COMPAT=true" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/tgi_client.py
./remove_container.sh
- name: Llama3-8b lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-8b-hf/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code llama-7b
./remove_container.sh
- name: Llama2-13b lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-13b-hf/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code llama-13b
./remove_container.sh
- name: Gemma-2b lmi container
working-directory: tests/integration
run: |
rm -rf models
# TODO: Fix tp back to gemma 7b once TRTLLM problem
# https://github.com/NVIDIA/TensorRT-LLM/issues/2058 fixed
echo -en "HF_MODEL_ID=s3://djl-llm/gemma-2b/\nTENSOR_PARALLEL_DEGREE=1" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code gemma-7b
./remove_container.sh
- name: Mistral-7b lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/mistral-7b/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code mistral-7b
./remove_container.sh
- name: GPTNeox lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/gpt-neox-20b/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code gpt-neox
./remove_container.sh
- name: Phi2 lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/phi-2/\nTENSOR_PARALLEL_DEGREE=1" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code phi-2
./remove_container.sh
- name: Baichuan lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/baichuan2-13b/\nHF_MODEL_TRUST_REMOTE_CODE=true" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code baichuan-13b
./remove_container.sh
- name: Qwen-1.5 lmi container
working-directory: tests/integration
if: ${{ matrix.container == 'lmi' }}
run: |
rm -rf models
echo -en "HF_MODEL_ID=Qwen/Qwen1.5-14B" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code qwen-1.5-14b
./remove_container.sh
- name: Starcoder2 lmi container
working-directory: tests/integration
if: ${{ matrix.container == 'lmi' }}
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/bigcode-starcoder2/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code starcoder
./remove_container.sh
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: no-code-${{ matrix.machine }}-${{ matrix.container }}-logs
path: tests/integration/logs/
inf2-series-no-code-tests:
runs-on: [self-hosted, "${{ matrix.machine }}"]
timeout-minutes: 240
needs: create-runners
strategy:
fail-fast: false
matrix:
container: [pytorch-inf2]
machine: [inf2]
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub pillow
- name: Install s5cmd
working-directory: serving/docker
run: sudo scripts/install_s5cmd.sh x64
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: serving/docker
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Llama2-7b with tgi compat
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-7b-hf/\nOPTION_TGI_COMPAT=true" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \
serve
python3 llm/tgi_client.py
./remove_container.sh
- name: Llama3-8b lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-8b-hf/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \
serve
python3 llm/client.py no_code llama-7b
./remove_container.sh
- name: Mistral-7b lmi container
working-directory: tests/integration
run: |
rm -rf models
echo -en "HF_MODEL_ID=s3://djl-llm/mistral-7b/" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \
serve
python3 llm/client.py no_code mistral-7b
./remove_container.sh
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: no-code-${{ matrix.machine }}-${{ matrix.container }}-logs
path: tests/integration/logs/
stop-runners-gseries:
if: always()
runs-on: [self-hosted, scheduler]
needs: [create-runners, g-series-no-code-tests]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.g512_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.g512_instance_id_2 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.g612_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.g612_instance_id_2 }}
./stop_instance.sh $instance_id
stop-runners-p4d:
if: always()
runs-on: [self-hosted, scheduler]
needs: [create-runners-p4d, p4d-no-code-tests]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners-p4d.outputs.p4d_instance_id }}
./stop_instance.sh $instance_id
stop-runners-inf2:
if: always()
runs-on: [self-hosted, scheduler]
needs: [create-runners-inf, inf2-series-no-code-tests]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners-inf.outputs.inf2_24x_instance_id }}
./stop_instance.sh $instance_id