From 6adbcd93e97d969a84dfce403f33260157b6b161 Mon Sep 17 00:00:00 2001 From: Wenxin Zhang Date: Tue, 14 May 2024 09:41:25 +0800 Subject: [PATCH] update Signed-off-by: Wenxin Zhang --- .github/workflows/model_test_cpu.yml | 165 +++++++++++ .../{model_test.yml => model_test_hpu.yml} | 99 +++++-- .../workflows/scripts/models/collect_log.sh | 9 +- .../scripts/models/generate_report.sh | 257 ++++++++++++++++++ .../workflows/scripts/models/model_test.sh | 29 +- 5 files changed, 517 insertions(+), 42 deletions(-) create mode 100644 .github/workflows/model_test_cpu.yml rename .github/workflows/{model_test.yml => model_test_hpu.yml} (54%) create mode 100644 .github/workflows/scripts/models/generate_report.sh diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml new file mode 100644 index 00000000..e8c97243 --- /dev/null +++ b/.github/workflows/model_test_cpu.yml @@ -0,0 +1,165 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Model Test on CPU + +on: + workflow_dispatch: + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true +permissions: write-all +env: + OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models + SCRIPT_PATH: /GenAIEval/.github/workflows/scripts + DOCKER_NAME: "genaieval" + DOCKER_TAG: "latest" + CONTAINER_NAME: "modelTest" + + +jobs: + Evaluation-Workflow: + runs-on: aise-cluster-cpu + strategy: + matrix: + include: + - modelName: "opt-125m" + datasets: "piqa" + device: "cpu" + tasks: "text-generation" + fail-fast: true + + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Load environment variables + run: + cat ~/actions-runner4/.env >> $GITHUB_ENV + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + submodules: "recursive" + fetch-tags: true + # We need this because GitHub needs to clone the branch to pipeline + - name: Docker Build + run: | + docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . + + - name: Docker Run + run: | + if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then + docker stop ${{ env.CONTAINER_NAME }} + docker rm -vf ${{ env.CONTAINER_NAME }} || true + fi + docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \ + -v ${{ github.workspace }}:/GenAIEval \ + -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \ + ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} + + - name: Binary build + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install" + + - name: Evaluation + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval/.github/workflows/scripts/models \ + && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}" + + - name: Collect Log + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval/.github/workflows/scripts/models \ + && bash -x collect_log.sh --model=${{ matrix.modelName }} \ + --device=${{ matrix.device }} \ + --datasets=${{ matrix.datasets }} \ + --tasks=${{ matrix.tasks }}" + + - name: Publish pipeline artifact + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }} + path: | + ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log + ${{ github.workspace }}/summary.log + if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` + retention-days: 60 # 1 <= retention-days <= 90 + + Genreate-Report: + runs-on: ubuntu-latest + needs: [Evaluation-Workflow] + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Download Summary Log + uses: actions/download-artifact@v4 + with: + path: ${{ env.OUT_SCRIPT_PATH }}/log + - name: Display structure of downloaded files + run: ls -R + - name: Analysis Summary + run: | + cd ${{ env.OUT_SCRIPT_PATH }} + ls -R + + - name: Download Reference Artifact + id: download-artifact + uses: dawidd6/action-download-artifact@v3.1.2 + with: + workflow: model-test.yml + name: FinalReport + run_id: ${{ vars.ModelTest_CPU_REF_ID }} + path: ${{ env.OUT_SCRIPT_PATH }} + name_is_regexp: true + repo: ${{ github.repository }} + check_artifacts: false + search_artifacts: false + skip_unpack: false + if_no_artifact_found: warn + + - name: Display structure of downloaded files + run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R + + - name: Generate report + run: | + echo "------ Generating final report.html ------" + cd ${{ env.OUT_SCRIPT_PATH }} + mkdir -p generated + /usr/bin/bash generate_report.sh + env: + RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }} + BUILD_NUMBER: ${{ github.run_id }} + JOB_STATUS: succeed + + - name: Publish Report + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: FinalReport + path: ${{ env.OUT_SCRIPT_PATH }}/generated + + - name: Specify performance regression + if: ${{ !cancelled() }} + run: | + if [ ${{ env.is_perf_reg }} == 'true' ]; then + echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." + exit 1 + fi diff --git a/.github/workflows/model_test.yml b/.github/workflows/model_test_hpu.yml similarity index 54% rename from .github/workflows/model_test.yml rename to .github/workflows/model_test_hpu.yml index 4f18c630..db0ee9b8 100644 --- a/.github/workflows/model_test.yml +++ b/.github/workflows/model_test_hpu.yml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: Model Test +name: Model Test on HPU on: workflow_dispatch: @@ -32,13 +32,13 @@ env: jobs: Evaluation-Workflow: - runs-on: aise-cluster + runs-on: aise-cluster-hpu strategy: matrix: include: - - modelName: "facebook/opt-125m" + - modelName: "opt-125m" datasets: "piqa" - device: "cpu" + device: "hpu" tasks: "text-generation" fail-fast: true @@ -54,7 +54,7 @@ jobs: # We need this because GitHub needs to clone the branch to pipeline - name: Docker Build run: | - docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . + docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . - name: Docker Run run: | @@ -64,6 +64,7 @@ jobs: fi docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \ -v ${{ github.workspace }}:/GenAIEval \ + -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \ ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} - name: Binary build @@ -71,24 +72,6 @@ jobs: docker exec ${{ env.CONTAINER_NAME }} \ bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install" - #- name: Download Reference Artifact - # id: download-artifact - # uses: dawidd6/action-download-artifact@v3.1.2 - # with: - # workflow: model_test.yml - # name: ${{ matrix.device }}-${{ matrix.modelName }} - # run_id: ${{ vars.ModelTest_REF_ID }} - # path: ${{ github.workspace }}/${{ matrix.device }}_${{ matrix.modelName }}_refer_log - # name_is_regexp: true - # repo: ${{ github.repository }} - # check_artifacts: false - # search_artifacts: false - # skip_unpack: false - # if_no_artifact_found: warn - - #- name: Display structure of downloaded files - # run: ls -R - - name: Evaluation run: | docker exec ${{ env.CONTAINER_NAME }} \ @@ -102,15 +85,77 @@ jobs: && bash -x collect_log.sh --model=${{ matrix.modelName }} \ --device=${{ matrix.device }} \ --datasets=${{ matrix.datasets }} \ - --tasks=${{ matrix.tasks }} + --tasks=${{ matrix.tasks }}" - name: Publish pipeline artifact uses: actions/upload-artifact@v4 if: ${{ !cancelled() }} with: - name: ${{ matrix.device }}-${{ matrix.modelName }} + name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }} path: | - ${{ github.workspace }}/${{ matrix.device }}/${{ matrix.modelName }} - ${{ github.workspace }}/.summary.log + ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log + ${{ github.workspace }}/summary.log if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` retention-days: 60 # 1 <= retention-days <= 90 + + Genreate-Report: + runs-on: ubuntu-latest + needs: [Evaluation-Workflow] + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Download Summary Log + uses: actions/download-artifact@v4 + with: + path: ${{ env.OUT_SCRIPT_PATH }}/log + - name: Display structure of downloaded files + run: ls -R + - name: Analysis Summary + run: | + cd ${{ env.OUT_SCRIPT_PATH }} + ls -R + + - name: Download Reference Artifact + id: download-artifact + uses: dawidd6/action-download-artifact@v3.1.2 + with: + workflow: model-test.yml + name: FinalReport + run_id: ${{ vars.ModelTest_HPU_REF_ID }} + path: ${{ env.OUT_SCRIPT_PATH }} + name_is_regexp: true + repo: ${{ github.repository }} + check_artifacts: false + search_artifacts: false + skip_unpack: false + if_no_artifact_found: warn + + - name: Display structure of downloaded files + run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R + + - name: Generate report + run: | + echo "------ Generating final report.html ------" + cd ${{ env.OUT_SCRIPT_PATH }} + mkdir -p generated + /usr/bin/bash generate_report.sh + env: + RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }} + BUILD_NUMBER: ${{ github.run_id }} + JOB_STATUS: succeed + + - name: Publish Report + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: FinalReport + path: ${{ env.OUT_SCRIPT_PATH }}/generated + + - name: Specify performance regression + if: ${{ !cancelled() }} + run: | + if [ ${{ env.is_perf_reg }} == 'true' ]; then + echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." + exit 1 + fi diff --git a/.github/workflows/scripts/models/collect_log.sh b/.github/workflows/scripts/models/collect_log.sh index a197393a..9c22d78f 100644 --- a/.github/workflows/scripts/models/collect_log.sh +++ b/.github/workflows/scripts/models/collect_log.sh @@ -14,6 +14,7 @@ # limitations under the License. set -eo pipefail +set -x source /GenAIEval/.github/workflows/scripts/change_color WORKSPACE="/GenAIEval" # get parameters @@ -34,14 +35,14 @@ for i in "$@"; do esac done -log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log" +log_file="/log/${device}/${model}/${device}-${tasks}-${model}-${datasets}.log" $BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET echo "working in" pwd if [[ ! -f ${log_file} ]]; then - echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log + echo "${device};${model};${tasks};${datasets};;" >> ${WORKSPACE}/summary.log else - acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g') - echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log + acc=$(grep -Po "acc .*(\d+(\.\d+)?)" ${log_file} | awk -F "|" '{print $2}' | head -n 1 | sed 's/.*://;s/[^0-9.]//g') + echo "${device};${model};${tasks};${datasets};${acc};" >> ${WORKSPACE}/summary.log fi diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh new file mode 100644 index 00000000..4732865d --- /dev/null +++ b/.github/workflows/scripts/models/generate_report.sh @@ -0,0 +1,257 @@ +#!/bin/bash +set -x +WORKSPACE=generated +last_log_path=FinalReport +summaryLog=${WORKSPACE}/summary.log +summaryLogLast=${last_log_path}/summary.log +PATTERN='[-a-zA-Z0-9_]*=' + +function main { + echo "summaryLog: ${summaryLog}" + echo "summaryLogLast: ${summaryLogLast}" + echo "is_perf_reg=false" >> "$GITHUB_ENV" + preprocessing + generate_html_head + generate_html_overview + generate_results + generate_html_footer +} + +function preprocessing { + for file_path in ./* + do + if [[ -d ${file_path} ]] && [[ -f ${file_path}/summary.log ]]; then + cat ${file_path}/summary.log >> ${summaryLog} + fi + done +} + +function generate_html_overview { + Test_Info_Title="Test Branch Commit ID " + Test_Info="${MR_source_branch} ${ghprbActualCommit} " + + cat >>${WORKSPACE}/report.html < +
+

ITREX Tests + [ Job-${BUILD_NUMBER} ]

+

Test Status: ${JOB_STATUS}

+

Summary

+ + + + ${Test_Info_Title} + + + + ${Test_Info} + +
Repo
ITREX
+eof +} + +function generate_results { + cat >>${WORKSPACE}/report.html <Performance + + + + + + + + +eof + + devices=$(cat ${summaryLog} | cut -d',' -f1 | awk '!a[$0]++') + for device in ${devices[@]}; do + models=$(cat ${summaryLog} | grep "${device}," | cut -d',' -f2 | awk '!a[$0]++') + for model in ${models[@]}; do + tasks=$(cat ${summaryLog} | grep "${device},${model}," | cut -d',' -f3 | awk '!a[$0]++') + for task in ${tasks[@]}; do + datasets=$(cat ${summaryLog} | grep "${device},${model},${task}," | cut -d',' -f4 | awk '!a[$0]++') + for dataset in ${datasets[@]}; do + benchmark_pattern="${device},${model},${task},${dataset}," + acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++') + acc_last=nan + if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") != 0 ]; then + acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++') + fi + generate_core + done + done + done + done + cat >>${WORKSPACE}/report.html < +eof +} + +function generate_core { + echo "" >>${WORKSPACE}/report.html + echo | awk -v acc=${acc} -v acc_l=${acc_last} ' + function show_benchmark(a) { + if(a ~/[1-9]/) { + printf("\n",a); + }else { + if(a == "") { + printf("\n",a); + }else{ + printf("\n"); + } + } + } + function compare_new_last(a,b){ + if(a ~/[1-9]/ && b ~/[1-9]/) { + target = b / a; + if(target >= 0.945) { + status_png = "background-color:#90EE90"; + }else { + status_png = "background-color:#FFD2D2"; + job_status = "fail" + } + printf("", status_png, target); + }else{ + if(a == ""){ + job_status = "fail" + status_png = "background-color:#FFD2D2"; + printf("", status_png); + }else{ + printf(""); + } + } + } + BEGIN { + job_status = "pass" + }{ + // current + show_benchmark(acc) + // Last + printf("\n") + show_benchmark(acc_l) + // current vs last + printf("\n"); + compare_new_last(acc,acc_l) + printf("\n"); + } END{ + printf("\n%s", job_status); + } + ' >>${WORKSPACE}/report.html + job_state=$(tail -1 ${WORKSPACE}/report.html) + sed -i '$s/.*//' ${WORKSPACE}/report.html + if [ ${job_state} == 'fail' ]; then + echo "is_perf_reg=true" >> "$GITHUB_ENV" + fi +} + +function generate_html_head { + cat >${WORKSPACE}/report.html < + + + + + + + Daily Tests - TensorFlow - Jenkins + + +eof +} + +function generate_html_footer { + cat >>${WORKSPACE}/report.html < + + +eof +} + +main diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh index fb420086..b0c2cba4 100644 --- a/.github/workflows/scripts/models/model_test.sh +++ b/.github/workflows/scripts/models/model_test.sh @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -eo pipefail +set -o pipefail +set -x source /GenAIEval/.github/workflows/scripts/change_color - +git config --global --add safe.directory /GenAIEval # get parameters PATTERN='[-a-zA-Z0-9_]*=' PERF_STABLE_CHECK=true @@ -34,11 +35,7 @@ for i in "$@"; do esac done -log_dir="/GenAIEval/${device}/${model}" -mkdir -p ${log_dir} working_dir="" -$BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET - main() { case ${tasks} in "text-generation") @@ -48,7 +45,16 @@ main() { *) echo "Not suppotted task"; exit 1;; esac + if [[ ${model} == *"opt"* ]]; then + pretrained="facebook/${model}" + else + pretrained="${model}" + fi + log_dir="/log/${device}/${model}" + mkdir -p ${log_dir} + $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET run_benchmark + cp ${log_dir}/${device}-${tasks}-${model}-${datasets}.log /GenAIEval/ } function prepare() { @@ -69,15 +75,16 @@ function prepare() { function run_benchmark() { cd ${working_dir} - overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log" + overall_log="${log_dir}/${device}-${tasks}-${model}-${datasets}.log" python main.py \ --model hf \ - --model_args pretrained=${model} \ + --model_args pretrained=${pretrained} \ --tasks ${datasets} \ --device ${device} \ - --batch_size 112 - 2>&1 | tee ${overall_log} - + --batch_size 112 2>&1 | tee ${overall_log} + + echo "print log content:" + cat ${overall_log} status=$? if [ ${status} != 0 ]; then echo "Evaluation process returned non-zero exit code."
DeviceTasksModelDatasetsAccuracy
${device}${model}${task}${dataset}New%.2f
Last
New/Last