From 924d47dc6ab6b04357d94ad50dd0934d7384cabe Mon Sep 17 00:00:00 2001
From: VincyZhang <wenxin.zhang@intel.com>
Date: Sun, 19 May 2024 18:36:01 -0700
Subject: [PATCH] Add modeltest (#13)

* add model test and trellix

Signed-off-by: Wenxin Zhang <wenxin.zhang@intel.com>

---------

Signed-off-by: Wenxin Zhang <wenxin.zhang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/workflows/docker/hpu.dockerfile       |  25 ++
 .github/workflows/model_test.yml              | 116 --------
 .github/workflows/model_test_cpu.yml          | 172 +++++++++++
 .github/workflows/model_test_hpu.yml          | 160 +++++++++++
 .../workflows/scripts/models/collect_log.sh   |   9 +-
 .../scripts/models/generate_report.sh         | 268 ++++++++++++++++++
 .../workflows/scripts/models/model_test.sh    |  37 ++-
 Docker/hpu.dockerfile                         |  25 ++
 8 files changed, 678 insertions(+), 134 deletions(-)
 create mode 100644 .github/workflows/docker/hpu.dockerfile
 delete mode 100644 .github/workflows/model_test.yml
 create mode 100644 .github/workflows/model_test_cpu.yml
 create mode 100644 .github/workflows/model_test_hpu.yml
 create mode 100644 .github/workflows/scripts/models/generate_report.sh
 create mode 100644 Docker/hpu.dockerfile

diff --git a/.github/workflows/docker/hpu.dockerfile b/.github/workflows/docker/hpu.dockerfile
new file mode 100644
index 00000000..e6a35d54
--- /dev/null
+++ b/.github/workflows/docker/hpu.dockerfile
@@ -0,0 +1,25 @@
+FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu
+
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+ARG REPO=https://github.com/intel/genaieval.git
+ARG REPO_PATH=""
+ARG BRANCH=main
+
+RUN apt-get update && \
+    apt-get install git-lfs && \
+    git-lfs install
+
+# Download code
+SHELL ["/bin/bash", "--login", "-c"]
+RUN mkdir -p /genaieval
+COPY ${REPO_PATH} /genaieval
+RUN if [ "$REPO_PATH" == "" ]; then rm -rf /genaieval/* && rm -rf /genaieval/.* ; git clone --single-branch --branch=${BRANCH} ${REPO} /genaieval ; fi
+
+# Build From Source
+RUN cd /genaieval && \
+    python setup.py install && \
+    pip install --upgrade-strategy eager optimum[habana] && \
+    pip list
+
+WORKDIR /genaieval/
\ No newline at end of file
diff --git a/.github/workflows/model_test.yml b/.github/workflows/model_test.yml
deleted file mode 100644
index 4f18c630..00000000
--- a/.github/workflows/model_test.yml
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: Model Test
-
-on:
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-permissions: write-all
-env:
-  OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
-  SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
-  DOCKER_NAME: "genaieval"
-  DOCKER_TAG: "latest"
-  CONTAINER_NAME: "modelTest"
-
-
-jobs:
-  Evaluation-Workflow:
-    runs-on: aise-cluster
-    strategy:
-      matrix:
-        include:
-          - modelName: "facebook/opt-125m"
-            datasets: "piqa"
-            device: "cpu"
-            tasks: "text-generation"
-      fail-fast: true
-
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: "recursive"
-          fetch-tags: true
-    # We need this because GitHub needs to clone the branch to pipeline
-      - name: Docker Build
-        run: |
-          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
-
-      - name: Docker Run
-        run: |
-          if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then
-            docker stop ${{ env.CONTAINER_NAME }}
-            docker rm -vf ${{ env.CONTAINER_NAME }} || true
-          fi
-          docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
-          -v ${{ github.workspace }}:/GenAIEval \
-          ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
-
-      - name: Binary build
-        run: |
-            docker exec ${{ env.CONTAINER_NAME }} \
-            bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install"
-
-      #- name: Download Reference Artifact
-      #  id: download-artifact
-      #  uses: dawidd6/action-download-artifact@v3.1.2
-      #  with:
-      #    workflow: model_test.yml
-      #    name: ${{ matrix.device }}-${{ matrix.modelName }}
-      #    run_id: ${{ vars.ModelTest_REF_ID }}
-      #    path: ${{ github.workspace }}/${{ matrix.device }}_${{ matrix.modelName }}_refer_log
-      #    name_is_regexp: true
-      #    repo: ${{ github.repository }}
-      #    check_artifacts: false
-      #    search_artifacts: false
-      #    skip_unpack: false
-      #    if_no_artifact_found: warn
-
-      #- name: Display structure of downloaded files
-      #  run: ls -R
-
-      - name: Evaluation
-        run: |
-            docker exec ${{ env.CONTAINER_NAME }} \
-            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
-            && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
-
-      - name: Collect Log
-        run: |
-            docker exec ${{ env.CONTAINER_NAME }} \
-            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
-            && bash -x collect_log.sh --model=${{ matrix.modelName }} \
-             --device=${{ matrix.device }} \
-             --datasets=${{ matrix.datasets }} \
-             --tasks=${{ matrix.tasks }}
-
-      - name: Publish pipeline artifact
-        uses: actions/upload-artifact@v4
-        if: ${{ !cancelled() }}
-        with:
-          name: ${{ matrix.device }}-${{ matrix.modelName }}
-          path: |
-            ${{ github.workspace }}/${{ matrix.device }}/${{ matrix.modelName }}
-            ${{ github.workspace }}/.summary.log
-          if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
-          retention-days: 60 # 1 <= retention-days <= 90
diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml
new file mode 100644
index 00000000..a687d7ff
--- /dev/null
+++ b/.github/workflows/model_test_cpu.yml
@@ -0,0 +1,172 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Model Test on CPU
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - .github/workflows/model_test_cpu.yml
+      - GenAIEval/**
+      - setup.py
+  workflow_dispatch:
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+permissions: write-all
+env:
+  OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
+  SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
+  DOCKER_NAME: "genaieval"
+  DOCKER_TAG: "latest"
+  CONTAINER_NAME: "modelTest"
+
+
+jobs:
+  Evaluation-Workflow:
+    runs-on: aise-cluster-cpu
+    strategy:
+      matrix:
+        include:
+          - modelName: "opt-125m"
+            datasets: "piqa"
+            device: "cpu"
+            tasks: "text-generation"
+      fail-fast: true
+
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Load environment variables
+        run:
+          cat ~/actions-runner4/.env >> $GITHUB_ENV
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: "recursive"
+          fetch-tags: true
+    # We need this because GitHub needs to clone the branch to pipeline
+      - name: Docker Build
+        run: |
+          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+
+      - name: Docker Run
+        run: |
+          if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then
+            docker stop ${{ env.CONTAINER_NAME }}
+            docker rm -vf ${{ env.CONTAINER_NAME }} || true
+          fi
+          docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
+          -v ${{ github.workspace }}:/GenAIEval \
+          -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \
+          ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
+
+      - name: Binary build
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install"
+
+      - name: Evaluation
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
+            && bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
+
+      - name: Collect Log
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
+            && bash -x collect_log.sh --model=${{ matrix.modelName }} \
+             --device=${{ matrix.device }} \
+             --datasets=${{ matrix.datasets }} \
+             --tasks=${{ matrix.tasks }}"
+
+      - name: Publish pipeline artifact
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}
+          path: |
+            ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log
+            ${{ github.workspace }}/summary.log
+          if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
+          retention-days: 60 # 1 <= retention-days <= 90
+
+  Genreate-Report:
+    runs-on: ubuntu-latest
+    needs: [Evaluation-Workflow]
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Download Summary Log
+        uses: actions/download-artifact@v4
+        with:
+          path: ${{ env.OUT_SCRIPT_PATH }}/log
+      - name: Display structure of downloaded files
+        run: ls -R
+      - name: Analysis Summary
+        run: |
+            cd ${{ env.OUT_SCRIPT_PATH }}
+            ls -R
+
+      - name: Download Reference Artifact
+        id: download-artifact
+        uses: dawidd6/action-download-artifact@v3.1.2
+        with:
+          workflow: model-test.yml
+          name: FinalReport
+          run_id: ${{ vars.ModelTest_CPU_REF_ID }}
+          path: ${{ env.OUT_SCRIPT_PATH }}
+          name_is_regexp: true
+          repo: ${{ github.repository }}
+          check_artifacts: false
+          search_artifacts: false
+          skip_unpack: false
+          if_no_artifact_found: warn
+
+      - name: Display structure of downloaded files
+        run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R
+
+      - name: Generate report
+        run: |
+          echo "------ Generating final report.html ------"
+          cd ${{ env.OUT_SCRIPT_PATH }}
+          mkdir -p generated
+          /usr/bin/bash -x generate_report.sh
+        env:
+          RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
+          BUILD_NUMBER: ${{ github.run_id }}
+          JOB_STATUS: succeed
+
+      - name: Publish Report
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: FinalReport
+          path: ${{ env.OUT_SCRIPT_PATH }}/generated
+
+      - name: Specify performance regression
+        if: ${{ !cancelled() }}
+        run: |
+          if [ ${{ env.is_perf_reg }} == 'true' ]; then
+            echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
+            exit 1
+          fi
diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml
new file mode 100644
index 00000000..82a10f79
--- /dev/null
+++ b/.github/workflows/model_test_hpu.yml
@@ -0,0 +1,160 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Model Test on HPU
+
+on:
+  pull_request:
+      branches: [main]
+      types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+      paths:
+        - .github/workflows/model_test_hpu.yml
+        - GenAIEval/**
+        - setup.py
+  workflow_dispatch:
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+permissions: write-all
+env:
+  OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
+  SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
+  DOCKER_NAME: "genaieval"
+  DOCKER_TAG: "latest"
+  CONTAINER_NAME: "modelTest"
+
+
+jobs:
+  Evaluation-Workflow:
+    runs-on: aise-cluster-hpu
+    strategy:
+      matrix:
+        include:
+          - modelName: "opt-125m"
+            datasets: "piqa"
+            device: "hpu"
+            tasks: "text-generation"
+      fail-fast: true
+
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: "recursive"
+          fetch-tags: true
+    # We need this because GitHub needs to clone the branch to pipeline
+      - name: Docker Build
+        run: |
+          docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/Docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+
+      - name: Docker Run
+        run: |
+          if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then
+            docker stop ${{ env.CONTAINER_NAME }}
+            docker rm -vf ${{ env.CONTAINER_NAME }} || true
+          fi
+          docker run -tid --runtime=habana --name=${{ env.CONTAINER_NAME }} -v ${{ github.workspace }}:/GenAIEval -v /dev/shm:/dev/shm ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
+
+      - name: Evaluation
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
+            && bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
+
+      - name: Collect Log
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
+            && bash -x collect_log.sh --model=${{ matrix.modelName }} \
+             --device=${{ matrix.device }} \
+             --datasets=${{ matrix.datasets }} \
+             --tasks=${{ matrix.tasks }}"
+
+      - name: Publish pipeline artifact
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}
+          path: |
+            ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log
+            ${{ github.workspace }}/summary.log
+          if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
+          retention-days: 60 # 1 <= retention-days <= 90
+
+  Genreate-Report:
+    runs-on: ubuntu-latest
+    needs: [Evaluation-Workflow]
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Download Summary Log
+        uses: actions/download-artifact@v4
+        with:
+          path: ${{ env.OUT_SCRIPT_PATH }}/log
+      - name: Display structure of downloaded files
+        run: ls -R
+      - name: Analysis Summary
+        run: |
+            cd ${{ env.OUT_SCRIPT_PATH }}
+            ls -R
+
+      - name: Download Reference Artifact
+        id: download-artifact
+        uses: dawidd6/action-download-artifact@v3.1.2
+        with:
+          workflow: model-test.yml
+          name: FinalReport
+          run_id: ${{ vars.ModelTest_HPU_REF_ID }}
+          path: ${{ env.OUT_SCRIPT_PATH }}
+          name_is_regexp: true
+          repo: ${{ github.repository }}
+          check_artifacts: false
+          search_artifacts: false
+          skip_unpack: false
+          if_no_artifact_found: warn
+
+      - name: Display structure of downloaded files
+        run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R
+
+      - name: Generate report
+        run: |
+          echo "------ Generating final report.html ------"
+          cd ${{ env.OUT_SCRIPT_PATH }}
+          mkdir -p generated
+          /usr/bin/bash -x generate_report.sh
+        env:
+          RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
+          BUILD_NUMBER: ${{ github.run_id }}
+          JOB_STATUS: succeed
+
+      - name: Publish Report
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: FinalReport
+          path: ${{ env.OUT_SCRIPT_PATH }}/generated
+
+      - name: Specify performance regression
+        if: ${{ !cancelled() }}
+        run: |
+          if [ ${{ env.is_perf_reg }} == 'true' ]; then
+            echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
+            exit 1
+          fi
diff --git a/.github/workflows/scripts/models/collect_log.sh b/.github/workflows/scripts/models/collect_log.sh
index a197393a..9c22d78f 100644
--- a/.github/workflows/scripts/models/collect_log.sh
+++ b/.github/workflows/scripts/models/collect_log.sh
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 set -eo pipefail
+set -x
 source /GenAIEval/.github/workflows/scripts/change_color
 WORKSPACE="/GenAIEval"
 # get parameters
@@ -34,14 +35,14 @@ for i in "$@"; do
     esac
 done
 
-log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log"
+log_file="/log/${device}/${model}/${device}-${tasks}-${model}-${datasets}.log"
 $BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET
 
 echo "working in"
 pwd
 if [[ ! -f ${log_file} ]]; then
-    echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log
+    echo "${device};${model};${tasks};${datasets};;" >> ${WORKSPACE}/summary.log
 else
-    acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
-    echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log
+    acc=$(grep -Po "acc .*(\d+(\.\d+)?)" ${log_file} | awk -F "|" '{print $2}' | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
+    echo "${device};${model};${tasks};${datasets};${acc};" >> ${WORKSPACE}/summary.log
 fi
diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh
new file mode 100644
index 00000000..4db273f5
--- /dev/null
+++ b/.github/workflows/scripts/models/generate_report.sh
@@ -0,0 +1,268 @@
+#!/bin/bash
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+WORKSPACE=generated
+last_log_path=FinalReport
+summaryLog=${WORKSPACE}/summary.log
+summaryLogLast=${last_log_path}/summary.log
+PATTERN='[-a-zA-Z0-9_]*='
+
+function main {
+    echo "summaryLog: ${summaryLog}"
+    echo "summaryLogLast: ${summaryLogLast}"
+    echo "is_perf_reg=false" >> "$GITHUB_ENV"
+    preprocessing
+    generate_html_head
+    generate_html_overview
+    generate_results
+    generate_html_footer
+}
+
+function preprocessing {
+    for file_path in log/*
+    do
+        if [[ -d ${file_path} ]] && [[ -f ${file_path}/summary.log ]]; then
+            cat ${file_path}/summary.log >> ${summaryLog}
+        fi
+    done
+}
+
+function generate_html_overview {
+    Test_Info_Title="<th colspan="4">Test Branch</th> <th colspan="4">Commit ID</th> "
+    Test_Info="<th colspan="4">${MR_source_branch}</th> <th colspan="4">${ghprbActualCommit}</th> "
+
+    cat >>${WORKSPACE}/report.html <<eof
+
+<body>
+    <div id="main">
+        <h1 align="center">ITREX Tests
+        [ <a href="${RUN_DISPLAY_URL}">Job-${BUILD_NUMBER}</a> ]</h1>
+      <h1 align="center">Test Status: ${JOB_STATUS}</h1>
+        <h2>Summary</h2>
+        <table class="features-table">
+            <tr>
+              <th>Repo</th>
+              ${Test_Info_Title}
+              </tr>
+              <tr>
+                    <td><a href="https://github.com/intel/intel-extension-for-transformers">ITREX</a></td>
+              ${Test_Info}
+                </tr>
+        </table>
+eof
+}
+
+function generate_results {
+    cat >>${WORKSPACE}/report.html <<eof
+    <h2>Performance</h2>
+      <table class="features-table">
+        <tr>
+          <th>Device</th>
+          <th>Tasks</th>
+          <th>Model</th>
+          <th>Datasets</th>
+          <th>VS</th>
+          <th>Accuracy</th>
+        </tr>
+eof
+
+    devices=$(cat ${summaryLog} | cut -d';' -f1 | awk '!a[$0]++')
+    for device in ${devices[@]}; do
+        models=$(cat ${summaryLog} | grep "${device};" | cut -d';' -f2 | awk '!a[$0]++')
+        for model in ${models[@]}; do
+            tasks=$(cat ${summaryLog} | grep "${device};${model};" | cut -d';' -f3 | awk '!a[$0]++')
+            for task in ${tasks[@]}; do
+                datasets=$(cat ${summaryLog} | grep "${device};${model};${task};" | cut -d';' -f4 | awk '!a[$0]++')
+                for dataset in ${datasets[@]}; do
+                    benchmark_pattern="${device};${model};${task};${dataset};"
+                    acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
+                    acc_last=nan
+                    if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") != 0 ]; then
+                        acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
+                    fi
+                    generate_core
+                done
+            done
+        done
+    done
+    cat >>${WORKSPACE}/report.html <<eof
+    </table>
+eof
+}
+
+function generate_core {
+    echo "<tr><td rowspan=3>${device}</td><td rowspan=3>${model}</td><td rowspan=3>${task}</td><td rowspan=3>${dataset}</td><td>New</td>" >>${WORKSPACE}/report.html
+    echo | awk -v acc=${acc} -v acc_l=${acc_last} '
+        function show_benchmark(a) {
+            if(a ~/[1-9]/) {
+                printf("<td>%.2f</td>\n",a);
+            }else {
+                printf("<td></td>\n");
+            }
+        }
+        function compare_new_last(a,b){
+            if(a ~/[1-9]/ && b ~/[1-9]/) {
+                target = b / a;
+                if(target >= 0.945) {
+                    status_png = "background-color:#90EE90";
+                }else {
+                    status_png = "background-color:#FFD2D2";
+                    job_status = "fail"
+                }
+                printf("<td style=\"%s\">%.2f</td>", status_png, target);
+            }else{
+                if(a == ""){
+                    job_status = "fail"
+                    status_png = "background-color:#FFD2D2";
+                    printf("<td style=\"%s\"></td>", status_png);
+                }else{
+                    printf("<td class=\"col-cell col-cell3\"></td>");
+                }
+            }
+        }
+        BEGIN {
+            job_status = "pass"
+        }{
+            // current
+            show_benchmark(acc)
+            // Last
+            printf("</tr>\n<tr><td>Last</td>")
+            show_benchmark(acc_l)
+            // current vs last
+            printf("</tr>\n<tr><td>New/Last</td>");
+            compare_new_last(acc,acc_l)
+            printf("</tr>\n");
+        } END{
+          printf("\n%s", job_status);
+        }
+    ' >>${WORKSPACE}/report.html
+    job_state=$(tail -1 ${WORKSPACE}/report.html)
+    sed -i '$s/.*//' ${WORKSPACE}/report.html
+    if [ ${job_state} == 'fail' ]; then
+        echo "is_perf_reg=true" >> "$GITHUB_ENV"
+    fi
+}
+
+function generate_html_head {
+    cat >${WORKSPACE}/report.html <<eof
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Daily Tests - TensorFlow - Jenkins</title>
+    <style>
+        body {
+            margin: 0;
+            padding: 0;
+            background: white no-repeat left top;
+        }
+
+        #main {
+            // width: 100%;
+            margin: 20px auto 10px auto;
+            background: white;
+            -moz-border-radius: 8px;
+            -webkit-border-radius: 8px;
+            padding: 0 30px 30px 30px;
+            border: 1px solid #adaa9f;
+            -moz-box-shadow: 0 2px 2px #9c9c9c;
+            -webkit-box-shadow: 0 2px 2px #9c9c9c;
+        }
+
+        .features-table {
+            width: 100%;
+            margin: 0 auto;
+            border-collapse: separate;
+            border-spacing: 0;
+            text-shadow: 0 1px 0 #fff;
+            color: #2a2a2a;
+            background: #fafafa;
+            background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff);
+            /* Firefox 3.6 */
+            background-image: -webkit-gradient(linear, center bottom, center top, from(#fff), color-stop(0.5, #eaeaea), to(#fff));
+            font-family: Verdana, Arial, Helvetica
+        }
+
+        .features-table th,
+        td {
+            text-align: center;
+            height: 25px;
+            line-height: 25px;
+            padding: 0 8px;
+            border: 1px solid #cdcdcd;
+            box-shadow: 0 1px 0 white;
+            -moz-box-shadow: 0 1px 0 white;
+            -webkit-box-shadow: 0 1px 0 white;
+            white-space: nowrap;
+        }
+
+        .no-border th {
+            box-shadow: none;
+            -moz-box-shadow: none;
+            -webkit-box-shadow: none;
+        }
+
+        .col-cell {
+            text-align: center;
+            width: 150px;
+            font: normal 1em Verdana, Arial, Helvetica;
+        }
+
+        .col-cell3 {
+            background: #efefef;
+            background: rgba(144, 144, 144, 0.15);
+        }
+
+        .col-cell1,
+        .col-cell2 {
+            background: #B0C4DE;
+            background: rgba(176, 196, 222, 0.3);
+        }
+
+        .col-cellh {
+            font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial;
+            -moz-border-radius-topright: 10px;
+            -moz-border-radius-topleft: 10px;
+            border-top-right-radius: 10px;
+            border-top-left-radius: 10px;
+            border-top: 1px solid #eaeaea !important;
+        }
+
+        .col-cellf {
+            font: bold 1.4em Georgia;
+            -moz-border-radius-bottomright: 10px;
+            -moz-border-radius-bottomleft: 10px;
+            border-bottom-right-radius: 10px;
+            border-bottom-left-radius: 10px;
+            border-bottom: 1px solid #dadada !important;
+        }
+    </style>
+</head>
+eof
+}
+
+function generate_html_footer {
+    cat >>${WORKSPACE}/report.html <<eof
+    </div>
+</body>
+</html>
+eof
+}
+
+main
diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
index fb420086..7d460ac2 100644
--- a/.github/workflows/scripts/models/model_test.sh
+++ b/.github/workflows/scripts/models/model_test.sh
@@ -13,9 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -eo pipefail
+set -o pipefail
+set -x
 source /GenAIEval/.github/workflows/scripts/change_color
-
+git config --global --add safe.directory /GenAIEval
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 PERF_STABLE_CHECK=true
@@ -34,11 +35,7 @@ for i in "$@"; do
     esac
 done
 
-log_dir="/GenAIEval/${device}/${model}"
-mkdir -p ${log_dir}
 working_dir=""
-$BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET
-
 main() {
     case ${tasks} in
         "text-generation")
@@ -48,7 +45,21 @@ main() {
         *)
             echo "Not suppotted task"; exit 1;;
     esac
+    if [[ ${model} == *"opt"* ]]; then
+        pretrained="facebook/${model}"
+    else
+        pretrained="${model}"
+    fi
+    if [[ ${device} == "cpu" ]]; then
+        model_sourze="hf"
+    elif [[ ${device} == "hpu" ]]; then
+        model_sourze="gaudi-hf"
+    fi
+    log_dir="/log/${device}/${model}"
+    mkdir -p ${log_dir}
+    $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET
     run_benchmark
+    cp ${log_dir}/${device}-${tasks}-${model}-${datasets}.log /GenAIEval/
 }
 
 function prepare() {
@@ -62,22 +73,20 @@ function prepare() {
     else
         echo "Not found requirements.txt file."
     fi
-    if [[ ${device} == "hpu" ]]; then
-        pip install --upgrade-strategy eager optimum[habana]
-    fi
 }
 
 function run_benchmark() {
     cd ${working_dir}
-    overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log"
+    overall_log="${log_dir}/${device}-${tasks}-${model}-${datasets}.log"
     python main.py \
-        --model hf \
-        --model_args pretrained=${model} \
+        --model ${model_sourze} \
+        --model_args pretrained=${pretrained} \
         --tasks ${datasets} \
         --device ${device} \
-        --batch_size 112
-        2>&1 | tee ${overall_log}
+        --batch_size 112  2>&1 | tee ${overall_log}
 
+    echo "print log content:"
+    cat ${overall_log}
     status=$?
     if [ ${status} != 0 ]; then
         echo "Evaluation process returned non-zero exit code."
diff --git a/Docker/hpu.dockerfile b/Docker/hpu.dockerfile
new file mode 100644
index 00000000..58c4ce1b
--- /dev/null
+++ b/Docker/hpu.dockerfile
@@ -0,0 +1,25 @@
+FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu
+
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+ARG REPO=https://github.com/opea-project/GenAIEval.git
+ARG REPO_PATH=""
+ARG BRANCH=main
+
+RUN apt-get update && \
+    apt-get install git-lfs && \
+    git-lfs install
+
+# Download code
+SHELL ["/bin/bash", "--login", "-c"]
+RUN mkdir -p /GenAIEval
+COPY ${REPO_PATH} /GenAIEval
+RUN if [ "$REPO_PATH" == "" ]; then rm -rf /GenAIEval/* && rm -rf /GenAIEval/.* ; git clone --single-branch --branch=${BRANCH} ${REPO} /GenAIEval ; fi
+
+# Build From Source
+RUN cd /GenAIEval && \
+    python setup.py install && \
+    pip install --upgrade-strategy eager optimum[habana] && \
+    pip list
+
+WORKDIR /GenAIEval/
\ No newline at end of file