opea-project · hshen14 · May 13, 2024 · May 13, 2024 · May 13, 2024 · May 13, 2024
@@ -25,9 +25,8 @@ permissions: write-all
 env:
   OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
   SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
-  REPO_NAME: "GenAIEval"
+  DOCKER_NAME: "genaieval"
   DOCKER_TAG: "latest"
-  DOCKER_FILE_NAME: "model.dockerfile"
   CONTAINER_NAME: "modelTest"
 
 
@@ -37,9 +36,10 @@ jobs:
     strategy:
       matrix:
         include:
-          - modelName: "EleutherAI/gpt-j-6B"
-            task: "hellaswag"
+          - modelName: "facebook/opt-125m"
+            datasets: "piqa"
             device: "cpu"
+            tasks: "text-generation"
       fail-fast: true
 
     steps:
@@ -54,7 +54,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build -f ${{ github.workspace }}/.github/workflows/docker/${{ env.DOCKER_FILE_NAME }} -t ${{ env.REPO_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |
@@ -64,7 +64,7 @@ jobs:
           fi
           docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
           -v ${{ github.workspace }}:/GenAIEval \
-          ${{ env.REPO_NAME }}:${{ env.DOCKER_TAG }}
+          ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
       - name: Binary build
         run: |
@@ -93,15 +93,16 @@ jobs:
         run: |
             docker exec ${{ env.CONTAINER_NAME }} \
             bash -c "cd /GenAIEval/.github/workflows/scripts/models \
-            && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --tasks=${{ matrix.task }}"
+            && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
 
       - name: Collect Log
         run: |
             docker exec ${{ env.CONTAINER_NAME }} \
             bash -c "cd /GenAIEval/.github/workflows/scripts/models \
             && bash -x collect_log.sh --model=${{ matrix.modelName }} \
              --device=${{ matrix.device }} \
-             --task=${{ matrix.task }}
+             --datasets=${{ matrix.datasets }} \
+             --tasks=${{ matrix.tasks }}
 
       - name: Publish pipeline artifact
         uses: actions/upload-artifact@v4

@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source ${workspace}/.github/workflows/scripts/change_color
+log_dir=${workspace}/.github/workflows/scripts/codeScan
+
+
+echo "---Updating definition (DAT) files ---"
+DEFS_URL=https://update.nai.com/products/commonupdater/current/vscandat1000/dat/0000
+echo "Finding latest defs at $DEFS_URL/avvdat.ini..." \
+ && wget -q $DEFS_URL/avvdat.ini \
+ && echo "SUCCESS" || fail
+
+inifile="avvdat.ini"
+filename=`awk -F"=" '$2 ~ /avvdat.*zip/ { print $2 } ' $inifile`
+filename2="$(echo -e "${filename}" | tr -d '[:space:]')"
+
+if [ -z "$filename2" ]
+then
+      echo "Cannot get defs information from INI file:"
+      cat $inifile
+      fail
+fi
+
+echo "Downloading latest defs from $DEFS_URL/$filename2..." \
+ && wget -q $DEFS_URL/$filename2 \
+ && echo "SUCCESS" || fail
+
+echo "Extracting latest defs..." \
+ && unzip -o $filename2 -d /usr/local/uvscan \
+ && echo "SUCCESS" || fail
+
+echo "--- Scanning ---"
+ENV_SCAN_OPTS="--analyze --mime --program --recursive --unzip --threads 4 --summary --verbose --html=${workspace}/.github/workflows/scripts/codeScan/report.html"
+echo "Scan Options: $ENV_SCAN_OPTS"
+
+rm -r ${workspace}/avvdat*
+rm -r ${workspace}/.git
+uvscan $ENV_SCAN_OPTS ${workspace} 2>&1 | tee ${log_dir}/trellix.log
+
+
+if [[ $(grep "Possibly Infected" ${log_dir}/trellix.log | sed 's/[^0-9]//g') != 0 ]]; then
+    $BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
+    exit 1
+fi
+
+$BOLD_PURPLE && echo "Congratulations, Trellix Scan passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
+exit 0
@@ -14,32 +14,34 @@
 # limitations under the License.
 
 set -eo pipefail
-source /GenAIEval/.github/workflows/script/change_color.sh
+source /GenAIEval/.github/workflows/scripts/change_color
 WORKSPACE="/GenAIEval"
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 PERF_STABLE_CHECK=true
 for i in "$@"; do
     case $i in
+        --datasets*)
+            datasets=`echo $i | sed "s/${PATTERN}//"`;;
         --device=*)
             device=`echo $i | sed "s/${PATTERN}//"`;;
         --model=*)
             model=`echo $i | sed "s/${PATTERN}//"`;;
-        --task=*)
-            task=`echo $i | sed "s/${PATTERN}//"`;;
+        --tasks=*)
+            tasks=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
 done
 
-output_file="/GenAIEval/${device}/${model}/${device}-${model}-${task}.log"
+log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log"
 $BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET
 
 echo "working in"
 pwd
-if [[ ! -f ${output_file} ]]; then
-    echo "${device};${model};${task};;${logfile}" >> ${WORKSPACE}/summary.log
+if [[ ! -f ${log_file} ]]; then
+    echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log
 else
-    acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${acc_log_name} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
-    echo "${device};${model};${task};${acc};${logfile}" >> ${WORKSPACE}/summary.log
+    acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
+    echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log
 fi
@@ -14,37 +14,45 @@
 # limitations under the License.
 
 set -eo pipefail
-source /GenAIEval/.github/workflows/script/change_color.sh
+source /GenAIEval/.github/workflows/scripts/change_color
 
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 PERF_STABLE_CHECK=true
 for i in "$@"; do
     case $i in
+        --datasets*)
+            datasets=`echo $i | sed "s/${PATTERN}//"`;;
         --device=*)
             device=`echo $i | sed "s/${PATTERN}//"`;;
         --model=*)
             model=`echo $i | sed "s/${PATTERN}//"`;;
-        --task=*)
-            task=`echo $i | sed "s/${PATTERN}//"`;;
+        --tasks=*)
+            tasks=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
 done
 
 log_dir="/GenAIEval/${device}/${model}"
 mkdir -p ${log_dir}
-
+working_dir=""
 $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET
 
 main() {
-    #prepare
+    case ${tasks} in
+        "text-generation")
+            working_dir="/GenAIEval/GenAIEval/evaluation/lm_evaluation_harness/examples";;
+        "code-generation")
+            working_dir="/GenAIEval/GenAIEval/evaluation/bigcode_evaluation_harness/examples";;
+        *)
+            echo "Not suppotted task"; exit 1;;
+    esac
     run_benchmark
 }
 
 function prepare() {
     ## prepare env
-    working_dir="/GenAIEval"
     cd ${working_dir}
     echo "Working in ${working_dir}"
     echo -e "\nInstalling model requirements..."
@@ -54,18 +62,20 @@ function prepare() {
     else
         echo "Not found requirements.txt file."
     fi
+    if [[ ${device} == "hpu" ]]; then
+        pip install --upgrade-strategy eager optimum[habana]
+    fi
 }
 
 function run_benchmark() {
     cd ${working_dir}
-    pip install --upgrade-strategy eager optimum[habana]
-    overall_log="${log_dir}/${device}-${model}-${task}.log"
+    overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log"
     python main.py \
         --model hf \
         --model_args pretrained=${model} \
-        --tasks ${task} \
+        --tasks ${datasets} \
         --device ${device} \
-        --batch_size 8
+        --batch_size 112
         2>&1 | tee ${overall_log}
 
     status=$?

@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-source ../../change_color.sh
+source ../../change_color
 LOG_DIR=$1
 coverage_compare="${LOG_DIR}/coverage_compare.html"
 coverage_log_pr="${LOG_DIR}/UnitTestPR-test/coverage_pr"

@@ -62,7 +62,7 @@ jobs:
 
         - name: Docker Build
           run: |
-            docker build -f ${{ github.workspace }}/.github/workflows/docker/ut.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+            docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
         - name: Docker Run
           run: |

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For evaluating the models on text-generation tasks, we follow the [lm-evaluation
 ```shell
 
 # pip install --upgrade-strategy eager optimum[habana]
-cd GenAIEval/evaluation/lm_evaluation_harness
+cd GenAIEval/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model gaudi-hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -29,7 +29,7 @@ python main.py \
 ##### CPU
 ```shell
 
-cd GenAIEval/evaluation/lm_evaluation_harness
+cd GenAIEval/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -57,7 +57,7 @@ For evaluating the models on coding tasks or specifically coding LLMs, we follow
 #### command line usage
 
 ```shell
-cd GenAIEval/evaluation/bigcode_evaluation_harness
+cd GenAIEval/evaluation/bigcode_evaluation_harness/examples
 python main.py \
     --model "codeparrot/codeparrot-small" \
     --tasks "humaneval" \