From 47bb4ac535cfc2920c21731047fe5b9af6866d14 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 6 Nov 2024 10:23:42 +0800 Subject: [PATCH 1/3] Update HPU Dockerfile to latest version and fix CI Signed-off-by: Sun, Xuehao --- .github/workflows/docker/hpu.dockerfile | 2 +- .github/workflows/model_test_cpu.yml | 1 + .github/workflows/model_test_hpu.yml | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker/hpu.dockerfile b/.github/workflows/docker/hpu.dockerfile index ed40e737..6aa695ea 100644 --- a/.github/workflows/docker/hpu.dockerfile +++ b/.github/workflows/docker/hpu.dockerfile @@ -1,4 +1,4 @@ -FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest as hpu ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/root:/usr/lib/habanalabs/ diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml index e2673c5a..370316d1 100644 --- a/.github/workflows/model_test_cpu.yml +++ b/.github/workflows/model_test_cpu.yml @@ -13,6 +13,7 @@ on: - evals/evaluation/** - evals/metrics/** - setup.py + - "!**.md" workflow_dispatch: # If there is a new commit, the previous jobs will be canceled diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml index 70be76bd..5a1c86ef 100644 --- a/.github/workflows/model_test_hpu.yml +++ b/.github/workflows/model_test_hpu.yml @@ -13,6 +13,7 @@ on: - evals/evaluation/** - evals/metrics/** - setup.py + - "!**.md" workflow_dispatch: # If there is a new commit, the previous jobs will be canceled From 1232d68d291440c8a3eb34280cf7bae9d12a4940 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 6 Nov 2024 10:31:17 +0800 Subject: [PATCH 2/3] update dockerfile Signed-off-by: Sun, Xuehao --- docker/hpu.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/hpu.dockerfile b/docker/hpu.dockerfile index 0e5ce6d1..a550c2b3 100644 --- a/docker/hpu.dockerfile +++ b/docker/hpu.dockerfile @@ -1,4 +1,4 @@ -FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest as hpu ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/root:/usr/lib/habanalabs/ From 2888203ac31e58721be55c7f084c53a267396f26 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 6 Nov 2024 11:44:28 +0800 Subject: [PATCH 3/3] fix ci Signed-off-by: Sun, Xuehao --- .github/workflows/model_test_hpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml index 5a1c86ef..dbb0bb3b 100644 --- a/.github/workflows/model_test_hpu.yml +++ b/.github/workflows/model_test_hpu.yml @@ -61,7 +61,7 @@ jobs: docker stop ${{ env.CONTAINER_NAME }} docker rm -vf ${{ env.CONTAINER_NAME }} || true fi - docker run -tid --runtime=habana --name=${{ env.CONTAINER_NAME }} -v ${{ github.workspace }}:/GenAIEval -v /dev/shm:/dev/shm ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} + docker run -tid --runtime=habana -e HABANA_VISIBLE_DEVICES=all --name=${{ env.CONTAINER_NAME }} -v ${{ github.workspace }}:/GenAIEval -v /dev/shm:/dev/shm ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} - name: Evaluation run: |