Skip to content

Commit

Permalink
add model test and trellix (#11)
Browse files Browse the repository at this point in the history
* add model test and trellix

Signed-off-by: Wenxin Zhang <[email protected]>

---------

Signed-off-by: Wenxin Zhang <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
VincyZhang and pre-commit-ci[bot] authored May 13, 2024
1 parent 90be6c2 commit 0a946dd
Show file tree
Hide file tree
Showing 10 changed files with 104 additions and 31 deletions.
File renamed without changes.
17 changes: 9 additions & 8 deletions .github/workflows/model_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ permissions: write-all
env:
OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
REPO_NAME: "GenAIEval"
DOCKER_NAME: "genaieval"
DOCKER_TAG: "latest"
DOCKER_FILE_NAME: "model.dockerfile"
CONTAINER_NAME: "modelTest"


Expand All @@ -37,9 +36,10 @@ jobs:
strategy:
matrix:
include:
- modelName: "EleutherAI/gpt-j-6B"
task: "hellaswag"
- modelName: "facebook/opt-125m"
datasets: "piqa"
device: "cpu"
tasks: "text-generation"
fail-fast: true

steps:
Expand All @@ -54,7 +54,7 @@ jobs:
# We need this because GitHub needs to clone the branch to pipeline
- name: Docker Build
run: |
docker build -f ${{ github.workspace }}/.github/workflows/docker/${{ env.DOCKER_FILE_NAME }} -t ${{ env.REPO_NAME }}:${{ env.DOCKER_TAG }} .
docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
- name: Docker Run
run: |
Expand All @@ -64,7 +64,7 @@ jobs:
fi
docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
-v ${{ github.workspace }}:/GenAIEval \
${{ env.REPO_NAME }}:${{ env.DOCKER_TAG }}
${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
- name: Binary build
run: |
Expand Down Expand Up @@ -93,15 +93,16 @@ jobs:
run: |
docker exec ${{ env.CONTAINER_NAME }} \
bash -c "cd /GenAIEval/.github/workflows/scripts/models \
&& bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --tasks=${{ matrix.task }}"
&& bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
- name: Collect Log
run: |
docker exec ${{ env.CONTAINER_NAME }} \
bash -c "cd /GenAIEval/.github/workflows/scripts/models \
&& bash -x collect_log.sh --model=${{ matrix.modelName }} \
--device=${{ matrix.device }} \
--task=${{ matrix.task }}
--datasets=${{ matrix.datasets }} \
--tasks=${{ matrix.tasks }}
- name: Publish pipeline artifact
uses: actions/upload-artifact@v4
Expand Down
60 changes: 60 additions & 0 deletions .github/workflows/scripts/codeScan/trellix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

source ${workspace}/.github/workflows/scripts/change_color
log_dir=${workspace}/.github/workflows/scripts/codeScan


echo "---Updating definition (DAT) files ---"
DEFS_URL=https://update.nai.com/products/commonupdater/current/vscandat1000/dat/0000
echo "Finding latest defs at $DEFS_URL/avvdat.ini..." \
&& wget -q $DEFS_URL/avvdat.ini \
&& echo "SUCCESS" || fail

inifile="avvdat.ini"
filename=`awk -F"=" '$2 ~ /avvdat.*zip/ { print $2 } ' $inifile`
filename2="$(echo -e "${filename}" | tr -d '[:space:]')"

if [ -z "$filename2" ]
then
echo "Cannot get defs information from INI file:"
cat $inifile
fail
fi

echo "Downloading latest defs from $DEFS_URL/$filename2..." \
&& wget -q $DEFS_URL/$filename2 \
&& echo "SUCCESS" || fail

echo "Extracting latest defs..." \
&& unzip -o $filename2 -d /usr/local/uvscan \
&& echo "SUCCESS" || fail

echo "--- Scanning ---"
ENV_SCAN_OPTS="--analyze --mime --program --recursive --unzip --threads 4 --summary --verbose --html=${workspace}/.github/workflows/scripts/codeScan/report.html"
echo "Scan Options: $ENV_SCAN_OPTS"

rm -r ${workspace}/avvdat*
rm -r ${workspace}/.git
uvscan $ENV_SCAN_OPTS ${workspace} 2>&1 | tee ${log_dir}/trellix.log


if [[ $(grep "Possibly Infected" ${log_dir}/trellix.log | sed 's/[^0-9]//g') != 0 ]]; then
$BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
exit 1
fi

$BOLD_PURPLE && echo "Congratulations, Trellix Scan passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
exit 0
18 changes: 10 additions & 8 deletions .github/workflows/scripts/models/collect_log.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,34 @@
# limitations under the License.

set -eo pipefail
source /GenAIEval/.github/workflows/script/change_color.sh
source /GenAIEval/.github/workflows/scripts/change_color
WORKSPACE="/GenAIEval"
# get parameters
PATTERN='[-a-zA-Z0-9_]*='
PERF_STABLE_CHECK=true
for i in "$@"; do
case $i in
--datasets*)
datasets=`echo $i | sed "s/${PATTERN}//"`;;
--device=*)
device=`echo $i | sed "s/${PATTERN}//"`;;
--model=*)
model=`echo $i | sed "s/${PATTERN}//"`;;
--task=*)
task=`echo $i | sed "s/${PATTERN}//"`;;
--tasks=*)
tasks=`echo $i | sed "s/${PATTERN}//"`;;
*)
echo "Parameter $i not recognized."; exit 1;;
esac
done

output_file="/GenAIEval/${device}/${model}/${device}-${model}-${task}.log"
log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log"
$BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET

echo "working in"
pwd
if [[ ! -f ${output_file} ]]; then
echo "${device};${model};${task};;${logfile}" >> ${WORKSPACE}/summary.log
if [[ ! -f ${log_file} ]]; then
echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log
else
acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${acc_log_name} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
echo "${device};${model};${task};${acc};${logfile}" >> ${WORKSPACE}/summary.log
acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log
fi
30 changes: 20 additions & 10 deletions .github/workflows/scripts/models/model_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,45 @@
# limitations under the License.

set -eo pipefail
source /GenAIEval/.github/workflows/script/change_color.sh
source /GenAIEval/.github/workflows/scripts/change_color

# get parameters
PATTERN='[-a-zA-Z0-9_]*='
PERF_STABLE_CHECK=true
for i in "$@"; do
case $i in
--datasets*)
datasets=`echo $i | sed "s/${PATTERN}//"`;;
--device=*)
device=`echo $i | sed "s/${PATTERN}//"`;;
--model=*)
model=`echo $i | sed "s/${PATTERN}//"`;;
--task=*)
task=`echo $i | sed "s/${PATTERN}//"`;;
--tasks=*)
tasks=`echo $i | sed "s/${PATTERN}//"`;;
*)
echo "Parameter $i not recognized."; exit 1;;
esac
done

log_dir="/GenAIEval/${device}/${model}"
mkdir -p ${log_dir}

working_dir=""
$BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET

main() {
#prepare
case ${tasks} in
"text-generation")
working_dir="/GenAIEval/GenAIEval/evaluation/lm_evaluation_harness/examples";;
"code-generation")
working_dir="/GenAIEval/GenAIEval/evaluation/bigcode_evaluation_harness/examples";;
*)
echo "Not suppotted task"; exit 1;;
esac
run_benchmark
}

function prepare() {
## prepare env
working_dir="/GenAIEval"
cd ${working_dir}
echo "Working in ${working_dir}"
echo -e "\nInstalling model requirements..."
Expand All @@ -54,18 +62,20 @@ function prepare() {
else
echo "Not found requirements.txt file."
fi
if [[ ${device} == "hpu" ]]; then
pip install --upgrade-strategy eager optimum[habana]
fi
}

function run_benchmark() {
cd ${working_dir}
pip install --upgrade-strategy eager optimum[habana]
overall_log="${log_dir}/${device}-${model}-${task}.log"
overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log"
python main.py \
--model hf \
--model_args pretrained=${model} \
--tasks ${task} \
--tasks ${datasets} \
--device ${device} \
--batch_size 8
--batch_size 112
2>&1 | tee ${overall_log}

status=$?
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scripts/unittest/calc_coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

source ../../change_color.sh
source ../../change_color
LOG_DIR=$1
coverage_compare="${LOG_DIR}/coverage_compare.html"
coverage_log_pr="${LOG_DIR}/UnitTestPR-test/coverage_pr"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:

- name: Docker Build
run: |
docker build -f ${{ github.workspace }}/.github/workflows/docker/ut.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .

- name: Docker Run
run: |
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ For evaluating the models on text-generation tasks, we follow the [lm-evaluation
```shell

# pip install --upgrade-strategy eager optimum[habana]
cd GenAIEval/evaluation/lm_evaluation_harness
cd GenAIEval/evaluation/lm_evaluation_harness/examples
python main.py \
--model gaudi-hf \
--model_args pretrained=EleutherAI/gpt-j-6B \
Expand All @@ -29,7 +29,7 @@ python main.py \
##### CPU
```shell

cd GenAIEval/evaluation/lm_evaluation_harness
cd GenAIEval/evaluation/lm_evaluation_harness/examples
python main.py \
--model hf \
--model_args pretrained=EleutherAI/gpt-j-6B \
Expand Down Expand Up @@ -57,7 +57,7 @@ For evaluating the models on coding tasks or specifically coding LLMs, we follow
#### command line usage

```shell
cd GenAIEval/evaluation/bigcode_evaluation_harness
cd GenAIEval/evaluation/bigcode_evaluation_harness/examples
python main.py \
--model "codeparrot/codeparrot-small" \
--tasks "humaneval" \
Expand Down

0 comments on commit 0a946dd

Please sign in to comment.