Large model integration tests with P4D and compiler optimizations #408
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Large model integration tests with P4D and compiler optimizations | |
on: | |
workflow_dispatch: | |
inputs: | |
djl-version: | |
description: 'The released version of DJL' | |
required: false | |
default: '' | |
run_test: | |
description: 'Run only the tests you need [aiccl]' | |
required: false | |
default: '' | |
schedule: | |
- cron: '0 15 * * *' | |
jobs: | |
create-runners-p4d: | |
runs-on: [self-hosted, scheduler] | |
steps: | |
- name: Create new P4d.24xl instance | |
id: create_gpu_p4d | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_lmic_p4d $token djl-serving | |
outputs: | |
p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }} | |
lmi-dist-test: | |
if: contains(fromJson('["", "aiccl"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, p4d ] | |
timeout-minutes: 120 | |
needs: create-runners-p4d | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install pytest requests "numpy<2" pillow huggingface_hub tqdm | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test Mixtral-8x7B | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py lmi_dist_aiccl mixtral-8x7b-aiccl | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \ | |
serve | |
python3 llm/client.py lmi_dist_aiccl mixtral-8x7b-aiccl | |
./remove_container.sh | |
- name: Test Llama-2-70B | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py lmi_dist_aiccl llama-2-70b-aiccl | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \ | |
serve | |
python3 llm/client.py lmi_dist_aiccl llama-2-70b-aiccl | |
./remove_container.sh | |
- name: Test codellama/CodeLlama-34b-hf | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py lmi_dist_aiccl codellama-34b-aiccl | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \ | |
serve | |
python3 llm/client.py lmi_dist_aiccl codellama-34b-aiccl | |
./remove_container.sh | |
- name: Test tiiuae/falcon-40b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py lmi_dist_aiccl falcon-40b-aiccl | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \ | |
serve | |
python3 llm/client.py lmi_dist_aiccl falcon-40b-aiccl | |
./remove_container.sh | |
- name: Remove models dir | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
./remove_container.sh || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: lmi-dist-aiccl-logs | |
path: tests/integration/logs/ | |
trtllm-test: | |
runs-on: [ self-hosted, p4d ] | |
timeout-minutes: 120 | |
needs: create-runners-p4d | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests "numpy<2" | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test llama-2-70B with TP8 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py trtllm llama2-70b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \ | |
serve | |
python3 llm/client.py trtllm llama2-70b | |
./remove_container.sh | |
- name: Test mixtral-8x7b with with TP8 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py trtllm mixtral-8x7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \ | |
serve | |
python3 llm/client.py trtllm mixtral-8x7b | |
./remove_container.sh | |
- name: Remove models dir | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
./remove_container.sh || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: trtllm-logs | |
path: tests/integration/logs/ | |
vllm-test: | |
runs-on: [ self-hosted, p4d ] | |
timeout-minutes: 120 | |
needs: create-runners-p4d | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests "numpy<2" | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test llama-2-70B with TP8 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py vllm llama2-70b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \ | |
serve | |
python3 llm/client.py vllm llama2-70b | |
./remove_container.sh | |
- name: Test mixtral-8x7b with with TP8 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py vllm mixtral-8x7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \ | |
serve | |
python3 llm/client.py vllm mixtral-8x7b | |
./remove_container.sh | |
- name: Remove models dir | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
./remove_container.sh || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: vllm-logs | |
path: tests/integration/logs/ | |
stop-runners-p4d: | |
if: always() | |
runs-on: [ self-hosted, scheduler ] | |
needs: [ create-runners-p4d, lmi-dist-test, trtllm-test, vllm-test ] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-runners-p4d.outputs.p4d_instance_id }} | |
./stop_instance.sh $instance_id |