From 6adbcd93e97d969a84dfce403f33260157b6b161 Mon Sep 17 00:00:00 2001
From: Wenxin Zhang <wenxin.zhang@intel.com>
Date: Tue, 14 May 2024 09:41:25 +0800
Subject: [PATCH] update

Signed-off-by: Wenxin Zhang <wenxin.zhang@intel.com>
---
 .github/workflows/model_test_cpu.yml          | 165 +++++++++++
 .../{model_test.yml => model_test_hpu.yml}    |  99 +++++--
 .../workflows/scripts/models/collect_log.sh   |   9 +-
 .../scripts/models/generate_report.sh         | 257 ++++++++++++++++++
 .../workflows/scripts/models/model_test.sh    |  29 +-
 5 files changed, 517 insertions(+), 42 deletions(-)
 create mode 100644 .github/workflows/model_test_cpu.yml
 rename .github/workflows/{model_test.yml => model_test_hpu.yml} (54%)
 create mode 100644 .github/workflows/scripts/models/generate_report.sh

diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml
new file mode 100644
index 00000000..e8c97243
--- /dev/null
+++ b/.github/workflows/model_test_cpu.yml
@@ -0,0 +1,165 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Model Test on CPU
+
+on:
+  workflow_dispatch:
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+permissions: write-all
+env:
+  OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
+  SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
+  DOCKER_NAME: "genaieval"
+  DOCKER_TAG: "latest"
+  CONTAINER_NAME: "modelTest"
+
+
+jobs:
+  Evaluation-Workflow:
+    runs-on: aise-cluster-cpu
+    strategy:
+      matrix:
+        include:
+          - modelName: "opt-125m"
+            datasets: "piqa"
+            device: "cpu"
+            tasks: "text-generation"
+      fail-fast: true
+
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+      
+      - name: Load environment variables
+        run:
+          cat ~/actions-runner4/.env >> $GITHUB_ENV
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: "recursive"
+          fetch-tags: true
+    # We need this because GitHub needs to clone the branch to pipeline
+      - name: Docker Build
+        run: |
+          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+
+      - name: Docker Run
+        run: |
+          if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then
+            docker stop ${{ env.CONTAINER_NAME }}
+            docker rm -vf ${{ env.CONTAINER_NAME }} || true
+          fi
+          docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
+          -v ${{ github.workspace }}:/GenAIEval \
+          -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \
+          ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
+
+      - name: Binary build
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install"
+
+      - name: Evaluation
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
+            && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
+
+      - name: Collect Log
+        run: |
+            docker exec ${{ env.CONTAINER_NAME }} \
+            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
+            && bash -x collect_log.sh --model=${{ matrix.modelName }} \
+             --device=${{ matrix.device }} \
+             --datasets=${{ matrix.datasets }} \
+             --tasks=${{ matrix.tasks }}"
+
+      - name: Publish pipeline artifact
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}
+          path: |
+            ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log
+            ${{ github.workspace }}/summary.log
+          if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
+          retention-days: 60 # 1 <= retention-days <= 90
+  
+  Genreate-Report:
+    runs-on: ubuntu-latest
+    needs: [Evaluation-Workflow]
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Download Summary Log
+        uses: actions/download-artifact@v4
+        with:
+          path: ${{ env.OUT_SCRIPT_PATH }}/log
+      - name: Display structure of downloaded files
+        run: ls -R
+      - name: Analysis Summary
+        run: |
+            cd ${{ env.OUT_SCRIPT_PATH }}
+            ls -R
+
+      - name: Download Reference Artifact
+        id: download-artifact
+        uses: dawidd6/action-download-artifact@v3.1.2
+        with:
+          workflow: model-test.yml
+          name: FinalReport
+          run_id: ${{ vars.ModelTest_CPU_REF_ID }}
+          path: ${{ env.OUT_SCRIPT_PATH }}
+          name_is_regexp: true
+          repo: ${{ github.repository }}
+          check_artifacts: false
+          search_artifacts: false
+          skip_unpack: false
+          if_no_artifact_found: warn
+
+      - name: Display structure of downloaded files
+        run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R
+
+      - name: Generate report
+        run: |
+          echo "------ Generating final report.html ------"
+          cd ${{ env.OUT_SCRIPT_PATH }}
+          mkdir -p generated
+          /usr/bin/bash generate_report.sh
+        env:
+          RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
+          BUILD_NUMBER: ${{ github.run_id }}
+          JOB_STATUS: succeed
+
+      - name: Publish Report
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: FinalReport
+          path: ${{ env.OUT_SCRIPT_PATH }}/generated
+
+      - name: Specify performance regression
+        if: ${{ !cancelled() }}
+        run: |
+          if [ ${{ env.is_perf_reg }} == 'true' ]; then
+            echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
+            exit 1
+          fi
diff --git a/.github/workflows/model_test.yml b/.github/workflows/model_test_hpu.yml
similarity index 54%
rename from .github/workflows/model_test.yml
rename to .github/workflows/model_test_hpu.yml
index 4f18c630..db0ee9b8 100644
--- a/.github/workflows/model_test.yml
+++ b/.github/workflows/model_test_hpu.yml
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-name: Model Test
+name: Model Test on HPU
 
 on:
   workflow_dispatch:
@@ -32,13 +32,13 @@ env:
 
 jobs:
   Evaluation-Workflow:
-    runs-on: aise-cluster
+    runs-on: aise-cluster-hpu
     strategy:
       matrix:
         include:
-          - modelName: "facebook/opt-125m"
+          - modelName: "opt-125m"
             datasets: "piqa"
-            device: "cpu"
+            device: "hpu"
             tasks: "text-generation"
       fail-fast: true
 
@@ -54,7 +54,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |
@@ -64,6 +64,7 @@ jobs:
           fi
           docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
           -v ${{ github.workspace }}:/GenAIEval \
+          -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \
           ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
       - name: Binary build
@@ -71,24 +72,6 @@ jobs:
             docker exec ${{ env.CONTAINER_NAME }} \
             bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install"
 
-      #- name: Download Reference Artifact
-      #  id: download-artifact
-      #  uses: dawidd6/action-download-artifact@v3.1.2
-      #  with:
-      #    workflow: model_test.yml
-      #    name: ${{ matrix.device }}-${{ matrix.modelName }}
-      #    run_id: ${{ vars.ModelTest_REF_ID }}
-      #    path: ${{ github.workspace }}/${{ matrix.device }}_${{ matrix.modelName }}_refer_log
-      #    name_is_regexp: true
-      #    repo: ${{ github.repository }}
-      #    check_artifacts: false
-      #    search_artifacts: false
-      #    skip_unpack: false
-      #    if_no_artifact_found: warn
-
-      #- name: Display structure of downloaded files
-      #  run: ls -R
-
       - name: Evaluation
         run: |
             docker exec ${{ env.CONTAINER_NAME }} \
@@ -102,15 +85,77 @@ jobs:
             && bash -x collect_log.sh --model=${{ matrix.modelName }} \
              --device=${{ matrix.device }} \
              --datasets=${{ matrix.datasets }} \
-             --tasks=${{ matrix.tasks }}
+             --tasks=${{ matrix.tasks }}"
 
       - name: Publish pipeline artifact
         uses: actions/upload-artifact@v4
         if: ${{ !cancelled() }}
         with:
-          name: ${{ matrix.device }}-${{ matrix.modelName }}
+          name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}
           path: |
-            ${{ github.workspace }}/${{ matrix.device }}/${{ matrix.modelName }}
-            ${{ github.workspace }}/.summary.log
+            ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log
+            ${{ github.workspace }}/summary.log
           if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
           retention-days: 60 # 1 <= retention-days <= 90
+  
+  Genreate-Report:
+    runs-on: ubuntu-latest
+    needs: [Evaluation-Workflow]
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Download Summary Log
+        uses: actions/download-artifact@v4
+        with:
+          path: ${{ env.OUT_SCRIPT_PATH }}/log
+      - name: Display structure of downloaded files
+        run: ls -R
+      - name: Analysis Summary
+        run: |
+            cd ${{ env.OUT_SCRIPT_PATH }}
+            ls -R
+
+      - name: Download Reference Artifact
+        id: download-artifact
+        uses: dawidd6/action-download-artifact@v3.1.2
+        with:
+          workflow: model-test.yml
+          name: FinalReport
+          run_id: ${{ vars.ModelTest_HPU_REF_ID }}
+          path: ${{ env.OUT_SCRIPT_PATH }}
+          name_is_regexp: true
+          repo: ${{ github.repository }}
+          check_artifacts: false
+          search_artifacts: false
+          skip_unpack: false
+          if_no_artifact_found: warn
+
+      - name: Display structure of downloaded files
+        run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R
+
+      - name: Generate report
+        run: |
+          echo "------ Generating final report.html ------"
+          cd ${{ env.OUT_SCRIPT_PATH }}
+          mkdir -p generated
+          /usr/bin/bash generate_report.sh
+        env:
+          RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
+          BUILD_NUMBER: ${{ github.run_id }}
+          JOB_STATUS: succeed
+
+      - name: Publish Report
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: FinalReport
+          path: ${{ env.OUT_SCRIPT_PATH }}/generated
+
+      - name: Specify performance regression
+        if: ${{ !cancelled() }}
+        run: |
+          if [ ${{ env.is_perf_reg }} == 'true' ]; then
+            echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
+            exit 1
+          fi
diff --git a/.github/workflows/scripts/models/collect_log.sh b/.github/workflows/scripts/models/collect_log.sh
index a197393a..9c22d78f 100644
--- a/.github/workflows/scripts/models/collect_log.sh
+++ b/.github/workflows/scripts/models/collect_log.sh
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 set -eo pipefail
+set -x
 source /GenAIEval/.github/workflows/scripts/change_color
 WORKSPACE="/GenAIEval"
 # get parameters
@@ -34,14 +35,14 @@ for i in "$@"; do
     esac
 done
 
-log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log"
+log_file="/log/${device}/${model}/${device}-${tasks}-${model}-${datasets}.log"
 $BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET
 
 echo "working in"
 pwd
 if [[ ! -f ${log_file} ]]; then
-    echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log
+    echo "${device};${model};${tasks};${datasets};;" >> ${WORKSPACE}/summary.log
 else
-    acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
-    echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log
+    acc=$(grep -Po "acc .*(\d+(\.\d+)?)" ${log_file} | awk -F "|" '{print $2}' | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
+    echo "${device};${model};${tasks};${datasets};${acc};" >> ${WORKSPACE}/summary.log
 fi
diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh
new file mode 100644
index 00000000..4732865d
--- /dev/null
+++ b/.github/workflows/scripts/models/generate_report.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+set -x
+WORKSPACE=generated
+last_log_path=FinalReport
+summaryLog=${WORKSPACE}/summary.log
+summaryLogLast=${last_log_path}/summary.log
+PATTERN='[-a-zA-Z0-9_]*='
+
+function main {
+    echo "summaryLog: ${summaryLog}"
+    echo "summaryLogLast: ${summaryLogLast}"
+    echo "is_perf_reg=false" >> "$GITHUB_ENV"
+    preprocessing
+    generate_html_head
+    generate_html_overview
+    generate_results
+    generate_html_footer
+}
+
+function preprocessing {
+    for file_path in ./*
+    do
+        if [[ -d ${file_path} ]] && [[ -f ${file_path}/summary.log ]]; then
+            cat ${file_path}/summary.log >> ${summaryLog}
+        fi
+    done
+}
+
+function generate_html_overview {
+    Test_Info_Title="<th colspan="4">Test Branch</th> <th colspan="4">Commit ID</th> "
+    Test_Info="<th colspan="4">${MR_source_branch}</th> <th colspan="4">${ghprbActualCommit}</th> "
+
+    cat >>${WORKSPACE}/report.html <<eof
+
+<body>
+    <div id="main">
+        <h1 align="center">ITREX Tests
+        [ <a href="${RUN_DISPLAY_URL}">Job-${BUILD_NUMBER}</a> ]</h1>
+      <h1 align="center">Test Status: ${JOB_STATUS}</h1>
+        <h2>Summary</h2>
+        <table class="features-table">
+            <tr>
+              <th>Repo</th>
+              ${Test_Info_Title}
+              </tr>
+              <tr>
+                    <td><a href="https://github.com/intel/intel-extension-for-transformers">ITREX</a></td>
+              ${Test_Info}
+                </tr>
+        </table>
+eof
+}
+
+function generate_results {
+    cat >>${WORKSPACE}/report.html <<eof
+    <h2>Performance</h2>
+      <table class="features-table">
+        <tr>
+          <th>Device</th>
+          <th>Tasks</th>
+          <th>Model</th>
+          <th>Datasets</th>
+          <th>Accuracy</th>
+        </tr>
+eof
+
+    devices=$(cat ${summaryLog} | cut -d',' -f1 | awk '!a[$0]++')
+    for device in ${devices[@]}; do
+        models=$(cat ${summaryLog} | grep "${device}," | cut -d',' -f2 | awk '!a[$0]++')
+        for model in ${models[@]}; do
+            tasks=$(cat ${summaryLog} | grep "${device},${model}," | cut -d',' -f3 | awk '!a[$0]++')
+            for task in ${tasks[@]}; do
+                datasets=$(cat ${summaryLog} | grep "${device},${model},${task}," | cut -d',' -f4 | awk '!a[$0]++')
+                for dataset in ${datasets[@]}; do
+                    benchmark_pattern="${device},${model},${task},${dataset},"
+                    acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++')
+                    acc_last=nan
+                    if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") != 0 ]; then
+                        acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++')
+                    fi
+                    generate_core
+                done
+            done
+        done
+    done
+    cat >>${WORKSPACE}/report.html <<eof
+    </table>
+eof
+}
+
+function generate_core {
+    echo "<tr><td rowspan=3>${device}</td><td rowspan=3>${model}</td><td rowspan=3>${task}</td><td rowspan=3>${dataset}</td><td>New</td>" >>${WORKSPACE}/report.html
+    echo | awk -v acc=${acc} -v acc_l=${acc_last} '
+        function show_benchmark(a) {
+            if(a ~/[1-9]/) {
+                printf("<td></td>\n",a);
+            }else {
+                if(a == "") {
+                    printf("<td></td>\n",a);
+                }else{
+                    printf("<td></td>\n");
+                }
+            }
+        }
+        function compare_new_last(a,b){
+            if(a ~/[1-9]/ && b ~/[1-9]/) {
+                target = b / a;
+                if(target >= 0.945) {
+                    status_png = "background-color:#90EE90";
+                }else {
+                    status_png = "background-color:#FFD2D2";
+                    job_status = "fail"
+                }
+                printf("<td style=\"%s\">%.2f</td>", status_png, target);
+            }else{
+                if(a == ""){
+                    job_status = "fail"
+                    status_png = "background-color:#FFD2D2";
+                    printf("<td style=\"%s\"></td>", status_png);
+                }else{
+                    printf("<td class=\"col-cell col-cell3\"></td>");
+                }
+            }
+        }
+        BEGIN {
+            job_status = "pass"
+        }{
+            // current
+            show_benchmark(acc)
+            // Last
+            printf("</tr>\n<tr><td>Last</td>")
+            show_benchmark(acc_l)          
+            // current vs last
+            printf("</tr>\n<tr><td>New/Last</td>");
+            compare_new_last(acc,acc_l)
+            printf("</tr>\n");
+        } END{
+          printf("\n%s", job_status);
+        }
+    ' >>${WORKSPACE}/report.html
+    job_state=$(tail -1 ${WORKSPACE}/report.html)
+    sed -i '$s/.*//' ${WORKSPACE}/report.html
+    if [ ${job_state} == 'fail' ]; then
+        echo "is_perf_reg=true" >> "$GITHUB_ENV"
+    fi
+}
+
+function generate_html_head {
+    cat >${WORKSPACE}/report.html <<eof
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Daily Tests - TensorFlow - Jenkins</title>
+    <style>
+        body {
+            margin: 0;
+            padding: 0;
+            background: white no-repeat left top;
+        }
+
+        #main {
+            // width: 100%;
+            margin: 20px auto 10px auto;
+            background: white;
+            -moz-border-radius: 8px;
+            -webkit-border-radius: 8px;
+            padding: 0 30px 30px 30px;
+            border: 1px solid #adaa9f;
+            -moz-box-shadow: 0 2px 2px #9c9c9c;
+            -webkit-box-shadow: 0 2px 2px #9c9c9c;
+        }
+
+        .features-table {
+            width: 100%;
+            margin: 0 auto;
+            border-collapse: separate;
+            border-spacing: 0;
+            text-shadow: 0 1px 0 #fff;
+            color: #2a2a2a;
+            background: #fafafa;
+            background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff);
+            /* Firefox 3.6 */
+            background-image: -webkit-gradient(linear, center bottom, center top, from(#fff), color-stop(0.5, #eaeaea), to(#fff));
+            font-family: Verdana, Arial, Helvetica
+        }
+
+        .features-table th,
+        td {
+            text-align: center;
+            height: 25px;
+            line-height: 25px;
+            padding: 0 8px;
+            border: 1px solid #cdcdcd;
+            box-shadow: 0 1px 0 white;
+            -moz-box-shadow: 0 1px 0 white;
+            -webkit-box-shadow: 0 1px 0 white;
+            white-space: nowrap;
+        }
+
+        .no-border th {
+            box-shadow: none;
+            -moz-box-shadow: none;
+            -webkit-box-shadow: none;
+        }
+
+        .col-cell {
+            text-align: center;
+            width: 150px;
+            font: normal 1em Verdana, Arial, Helvetica;
+        }
+
+        .col-cell3 {
+            background: #efefef;
+            background: rgba(144, 144, 144, 0.15);
+        }
+
+        .col-cell1,
+        .col-cell2 {
+            background: #B0C4DE;
+            background: rgba(176, 196, 222, 0.3);
+        }
+
+        .col-cellh {
+            font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial;
+            -moz-border-radius-topright: 10px;
+            -moz-border-radius-topleft: 10px;
+            border-top-right-radius: 10px;
+            border-top-left-radius: 10px;
+            border-top: 1px solid #eaeaea !important;
+        }
+
+        .col-cellf {
+            font: bold 1.4em Georgia;
+            -moz-border-radius-bottomright: 10px;
+            -moz-border-radius-bottomleft: 10px;
+            border-bottom-right-radius: 10px;
+            border-bottom-left-radius: 10px;
+            border-bottom: 1px solid #dadada !important;
+        }
+    </style>
+</head>
+eof
+}
+
+function generate_html_footer {
+    cat >>${WORKSPACE}/report.html <<eof
+    </div>
+</body>
+</html>
+eof
+}
+
+main
diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
index fb420086..b0c2cba4 100644
--- a/.github/workflows/scripts/models/model_test.sh
+++ b/.github/workflows/scripts/models/model_test.sh
@@ -13,9 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -eo pipefail
+set -o pipefail
+set -x
 source /GenAIEval/.github/workflows/scripts/change_color
-
+git config --global --add safe.directory /GenAIEval
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 PERF_STABLE_CHECK=true
@@ -34,11 +35,7 @@ for i in "$@"; do
     esac
 done
 
-log_dir="/GenAIEval/${device}/${model}"
-mkdir -p ${log_dir}
 working_dir=""
-$BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET
-
 main() {
     case ${tasks} in
         "text-generation")
@@ -48,7 +45,16 @@ main() {
         *)
             echo "Not suppotted task"; exit 1;;
     esac
+    if [[ ${model} == *"opt"* ]]; then
+        pretrained="facebook/${model}"
+    else
+        pretrained="${model}"
+    fi
+    log_dir="/log/${device}/${model}"
+    mkdir -p ${log_dir}
+    $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET
     run_benchmark
+    cp ${log_dir}/${device}-${tasks}-${model}-${datasets}.log /GenAIEval/
 }
 
 function prepare() {
@@ -69,15 +75,16 @@ function prepare() {
 
 function run_benchmark() {
     cd ${working_dir}
-    overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log"
+    overall_log="${log_dir}/${device}-${tasks}-${model}-${datasets}.log"
     python main.py \
         --model hf \
-        --model_args pretrained=${model} \
+        --model_args pretrained=${pretrained} \
         --tasks ${datasets} \
         --device ${device} \
-        --batch_size 112
-        2>&1 | tee ${overall_log}
-
+        --batch_size 112  2>&1 | tee ${overall_log}
+    
+    echo "print log content:"
+    cat ${overall_log}
     status=$?
     if [ ${status} != 0 ]; then
         echo "Evaluation process returned non-zero exit code."