From 924d47dc6ab6b04357d94ad50dd0934d7384cabe Mon Sep 17 00:00:00 2001 From: VincyZhang Date: Sun, 19 May 2024 18:36:01 -0700 Subject: [PATCH] Add modeltest (#13) * add model test and trellix Signed-off-by: Wenxin Zhang --------- Signed-off-by: Wenxin Zhang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/workflows/docker/hpu.dockerfile | 25 ++ .github/workflows/model_test.yml | 116 -------- .github/workflows/model_test_cpu.yml | 172 +++++++++++ .github/workflows/model_test_hpu.yml | 160 +++++++++++ .../workflows/scripts/models/collect_log.sh | 9 +- .../scripts/models/generate_report.sh | 268 ++++++++++++++++++ .../workflows/scripts/models/model_test.sh | 37 ++- Docker/hpu.dockerfile | 25 ++ 8 files changed, 678 insertions(+), 134 deletions(-) create mode 100644 .github/workflows/docker/hpu.dockerfile delete mode 100644 .github/workflows/model_test.yml create mode 100644 .github/workflows/model_test_cpu.yml create mode 100644 .github/workflows/model_test_hpu.yml create mode 100644 .github/workflows/scripts/models/generate_report.sh create mode 100644 Docker/hpu.dockerfile diff --git a/.github/workflows/docker/hpu.dockerfile b/.github/workflows/docker/hpu.dockerfile new file mode 100644 index 00000000..e6a35d54 --- /dev/null +++ b/.github/workflows/docker/hpu.dockerfile @@ -0,0 +1,25 @@ +FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu + +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/root:/usr/lib/habanalabs/ +ARG REPO=https://github.com/intel/genaieval.git +ARG REPO_PATH="" +ARG BRANCH=main + +RUN apt-get update && \ + apt-get install git-lfs && \ + git-lfs install + +# Download code +SHELL ["/bin/bash", "--login", "-c"] +RUN mkdir -p /genaieval +COPY ${REPO_PATH} /genaieval +RUN if [ "$REPO_PATH" == "" ]; then rm -rf /genaieval/* && rm -rf /genaieval/.* ; git clone --single-branch --branch=${BRANCH} ${REPO} /genaieval ; fi + +# Build From Source +RUN cd /genaieval && \ + python setup.py install && \ + pip install --upgrade-strategy eager optimum[habana] && \ + pip list + +WORKDIR /genaieval/ \ No newline at end of file diff --git a/.github/workflows/model_test.yml b/.github/workflows/model_test.yml deleted file mode 100644 index 4f18c630..00000000 --- a/.github/workflows/model_test.yml +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Model Test - -on: - workflow_dispatch: - -# If there is a new commit, the previous jobs will be canceled -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -permissions: write-all -env: - OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models - SCRIPT_PATH: /GenAIEval/.github/workflows/scripts - DOCKER_NAME: "genaieval" - DOCKER_TAG: "latest" - CONTAINER_NAME: "modelTest" - - -jobs: - Evaluation-Workflow: - runs-on: aise-cluster - strategy: - matrix: - include: - - modelName: "facebook/opt-125m" - datasets: "piqa" - device: "cpu" - tasks: "text-generation" - fail-fast: true - - steps: - - name: Clean Up Working Directory - run: sudo rm -rf ${{github.workspace}}/* - - - name: Checkout out Repo - uses: actions/checkout@v4 - with: - submodules: "recursive" - fetch-tags: true - # We need this because GitHub needs to clone the branch to pipeline - - name: Docker Build - run: | - docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . - - - name: Docker Run - run: | - if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then - docker stop ${{ env.CONTAINER_NAME }} - docker rm -vf ${{ env.CONTAINER_NAME }} || true - fi - docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \ - -v ${{ github.workspace }}:/GenAIEval \ - ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} - - - name: Binary build - run: | - docker exec ${{ env.CONTAINER_NAME }} \ - bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install" - - #- name: Download Reference Artifact - # id: download-artifact - # uses: dawidd6/action-download-artifact@v3.1.2 - # with: - # workflow: model_test.yml - # name: ${{ matrix.device }}-${{ matrix.modelName }} - # run_id: ${{ vars.ModelTest_REF_ID }} - # path: ${{ github.workspace }}/${{ matrix.device }}_${{ matrix.modelName }}_refer_log - # name_is_regexp: true - # repo: ${{ github.repository }} - # check_artifacts: false - # search_artifacts: false - # skip_unpack: false - # if_no_artifact_found: warn - - #- name: Display structure of downloaded files - # run: ls -R - - - name: Evaluation - run: | - docker exec ${{ env.CONTAINER_NAME }} \ - bash -c "cd /GenAIEval/.github/workflows/scripts/models \ - && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}" - - - name: Collect Log - run: | - docker exec ${{ env.CONTAINER_NAME }} \ - bash -c "cd /GenAIEval/.github/workflows/scripts/models \ - && bash -x collect_log.sh --model=${{ matrix.modelName }} \ - --device=${{ matrix.device }} \ - --datasets=${{ matrix.datasets }} \ - --tasks=${{ matrix.tasks }} - - - name: Publish pipeline artifact - uses: actions/upload-artifact@v4 - if: ${{ !cancelled() }} - with: - name: ${{ matrix.device }}-${{ matrix.modelName }} - path: | - ${{ github.workspace }}/${{ matrix.device }}/${{ matrix.modelName }} - ${{ github.workspace }}/.summary.log - if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` - retention-days: 60 # 1 <= retention-days <= 90 diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml new file mode 100644 index 00000000..a687d7ff --- /dev/null +++ b/.github/workflows/model_test_cpu.yml @@ -0,0 +1,172 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Model Test on CPU + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - .github/workflows/model_test_cpu.yml + - GenAIEval/** + - setup.py + workflow_dispatch: + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true +permissions: write-all +env: + OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models + SCRIPT_PATH: /GenAIEval/.github/workflows/scripts + DOCKER_NAME: "genaieval" + DOCKER_TAG: "latest" + CONTAINER_NAME: "modelTest" + + +jobs: + Evaluation-Workflow: + runs-on: aise-cluster-cpu + strategy: + matrix: + include: + - modelName: "opt-125m" + datasets: "piqa" + device: "cpu" + tasks: "text-generation" + fail-fast: true + + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Load environment variables + run: + cat ~/actions-runner4/.env >> $GITHUB_ENV + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + submodules: "recursive" + fetch-tags: true + # We need this because GitHub needs to clone the branch to pipeline + - name: Docker Build + run: | + docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . + + - name: Docker Run + run: | + if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then + docker stop ${{ env.CONTAINER_NAME }} + docker rm -vf ${{ env.CONTAINER_NAME }} || true + fi + docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \ + -v ${{ github.workspace }}:/GenAIEval \ + -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \ + ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} + + - name: Binary build + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install" + + - name: Evaluation + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval/.github/workflows/scripts/models \ + && bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}" + + - name: Collect Log + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval/.github/workflows/scripts/models \ + && bash -x collect_log.sh --model=${{ matrix.modelName }} \ + --device=${{ matrix.device }} \ + --datasets=${{ matrix.datasets }} \ + --tasks=${{ matrix.tasks }}" + + - name: Publish pipeline artifact + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }} + path: | + ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log + ${{ github.workspace }}/summary.log + if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` + retention-days: 60 # 1 <= retention-days <= 90 + + Genreate-Report: + runs-on: ubuntu-latest + needs: [Evaluation-Workflow] + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Download Summary Log + uses: actions/download-artifact@v4 + with: + path: ${{ env.OUT_SCRIPT_PATH }}/log + - name: Display structure of downloaded files + run: ls -R + - name: Analysis Summary + run: | + cd ${{ env.OUT_SCRIPT_PATH }} + ls -R + + - name: Download Reference Artifact + id: download-artifact + uses: dawidd6/action-download-artifact@v3.1.2 + with: + workflow: model-test.yml + name: FinalReport + run_id: ${{ vars.ModelTest_CPU_REF_ID }} + path: ${{ env.OUT_SCRIPT_PATH }} + name_is_regexp: true + repo: ${{ github.repository }} + check_artifacts: false + search_artifacts: false + skip_unpack: false + if_no_artifact_found: warn + + - name: Display structure of downloaded files + run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R + + - name: Generate report + run: | + echo "------ Generating final report.html ------" + cd ${{ env.OUT_SCRIPT_PATH }} + mkdir -p generated + /usr/bin/bash -x generate_report.sh + env: + RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }} + BUILD_NUMBER: ${{ github.run_id }} + JOB_STATUS: succeed + + - name: Publish Report + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: FinalReport + path: ${{ env.OUT_SCRIPT_PATH }}/generated + + - name: Specify performance regression + if: ${{ !cancelled() }} + run: | + if [ ${{ env.is_perf_reg }} == 'true' ]; then + echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." + exit 1 + fi diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml new file mode 100644 index 00000000..82a10f79 --- /dev/null +++ b/.github/workflows/model_test_hpu.yml @@ -0,0 +1,160 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Model Test on HPU + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - .github/workflows/model_test_hpu.yml + - GenAIEval/** + - setup.py + workflow_dispatch: + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true +permissions: write-all +env: + OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models + SCRIPT_PATH: /GenAIEval/.github/workflows/scripts + DOCKER_NAME: "genaieval" + DOCKER_TAG: "latest" + CONTAINER_NAME: "modelTest" + + +jobs: + Evaluation-Workflow: + runs-on: aise-cluster-hpu + strategy: + matrix: + include: + - modelName: "opt-125m" + datasets: "piqa" + device: "hpu" + tasks: "text-generation" + fail-fast: true + + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + submodules: "recursive" + fetch-tags: true + # We need this because GitHub needs to clone the branch to pipeline + - name: Docker Build + run: | + docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/Docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . + + - name: Docker Run + run: | + if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then + docker stop ${{ env.CONTAINER_NAME }} + docker rm -vf ${{ env.CONTAINER_NAME }} || true + fi + docker run -tid --runtime=habana --name=${{ env.CONTAINER_NAME }} -v ${{ github.workspace }}:/GenAIEval -v /dev/shm:/dev/shm ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} + + - name: Evaluation + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval/.github/workflows/scripts/models \ + && bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}" + + - name: Collect Log + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /GenAIEval/.github/workflows/scripts/models \ + && bash -x collect_log.sh --model=${{ matrix.modelName }} \ + --device=${{ matrix.device }} \ + --datasets=${{ matrix.datasets }} \ + --tasks=${{ matrix.tasks }}" + + - name: Publish pipeline artifact + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }} + path: | + ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log + ${{ github.workspace }}/summary.log + if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` + retention-days: 60 # 1 <= retention-days <= 90 + + Genreate-Report: + runs-on: ubuntu-latest + needs: [Evaluation-Workflow] + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Download Summary Log + uses: actions/download-artifact@v4 + with: + path: ${{ env.OUT_SCRIPT_PATH }}/log + - name: Display structure of downloaded files + run: ls -R + - name: Analysis Summary + run: | + cd ${{ env.OUT_SCRIPT_PATH }} + ls -R + + - name: Download Reference Artifact + id: download-artifact + uses: dawidd6/action-download-artifact@v3.1.2 + with: + workflow: model-test.yml + name: FinalReport + run_id: ${{ vars.ModelTest_HPU_REF_ID }} + path: ${{ env.OUT_SCRIPT_PATH }} + name_is_regexp: true + repo: ${{ github.repository }} + check_artifacts: false + search_artifacts: false + skip_unpack: false + if_no_artifact_found: warn + + - name: Display structure of downloaded files + run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R + + - name: Generate report + run: | + echo "------ Generating final report.html ------" + cd ${{ env.OUT_SCRIPT_PATH }} + mkdir -p generated + /usr/bin/bash -x generate_report.sh + env: + RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }} + BUILD_NUMBER: ${{ github.run_id }} + JOB_STATUS: succeed + + - name: Publish Report + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: FinalReport + path: ${{ env.OUT_SCRIPT_PATH }}/generated + + - name: Specify performance regression + if: ${{ !cancelled() }} + run: | + if [ ${{ env.is_perf_reg }} == 'true' ]; then + echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." + exit 1 + fi diff --git a/.github/workflows/scripts/models/collect_log.sh b/.github/workflows/scripts/models/collect_log.sh index a197393a..9c22d78f 100644 --- a/.github/workflows/scripts/models/collect_log.sh +++ b/.github/workflows/scripts/models/collect_log.sh @@ -14,6 +14,7 @@ # limitations under the License. set -eo pipefail +set -x source /GenAIEval/.github/workflows/scripts/change_color WORKSPACE="/GenAIEval" # get parameters @@ -34,14 +35,14 @@ for i in "$@"; do esac done -log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log" +log_file="/log/${device}/${model}/${device}-${tasks}-${model}-${datasets}.log" $BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET echo "working in" pwd if [[ ! -f ${log_file} ]]; then - echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log + echo "${device};${model};${tasks};${datasets};;" >> ${WORKSPACE}/summary.log else - acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g') - echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log + acc=$(grep -Po "acc .*(\d+(\.\d+)?)" ${log_file} | awk -F "|" '{print $2}' | head -n 1 | sed 's/.*://;s/[^0-9.]//g') + echo "${device};${model};${tasks};${datasets};${acc};" >> ${WORKSPACE}/summary.log fi diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh new file mode 100644 index 00000000..4db273f5 --- /dev/null +++ b/.github/workflows/scripts/models/generate_report.sh @@ -0,0 +1,268 @@ +#!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +WORKSPACE=generated +last_log_path=FinalReport +summaryLog=${WORKSPACE}/summary.log +summaryLogLast=${last_log_path}/summary.log +PATTERN='[-a-zA-Z0-9_]*=' + +function main { + echo "summaryLog: ${summaryLog}" + echo "summaryLogLast: ${summaryLogLast}" + echo "is_perf_reg=false" >> "$GITHUB_ENV" + preprocessing + generate_html_head + generate_html_overview + generate_results + generate_html_footer +} + +function preprocessing { + for file_path in log/* + do + if [[ -d ${file_path} ]] && [[ -f ${file_path}/summary.log ]]; then + cat ${file_path}/summary.log >> ${summaryLog} + fi + done +} + +function generate_html_overview { + Test_Info_Title="Test Branch Commit ID " + Test_Info="${MR_source_branch} ${ghprbActualCommit} " + + cat >>${WORKSPACE}/report.html < +
+

ITREX Tests + [ Job-${BUILD_NUMBER} ]

+

Test Status: ${JOB_STATUS}

+

Summary

+ + + + ${Test_Info_Title} + + + + ${Test_Info} + +
Repo
ITREX
+eof +} + +function generate_results { + cat >>${WORKSPACE}/report.html <Performance + + + + + + + + + +eof + + devices=$(cat ${summaryLog} | cut -d';' -f1 | awk '!a[$0]++') + for device in ${devices[@]}; do + models=$(cat ${summaryLog} | grep "${device};" | cut -d';' -f2 | awk '!a[$0]++') + for model in ${models[@]}; do + tasks=$(cat ${summaryLog} | grep "${device};${model};" | cut -d';' -f3 | awk '!a[$0]++') + for task in ${tasks[@]}; do + datasets=$(cat ${summaryLog} | grep "${device};${model};${task};" | cut -d';' -f4 | awk '!a[$0]++') + for dataset in ${datasets[@]}; do + benchmark_pattern="${device};${model};${task};${dataset};" + acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++') + acc_last=nan + if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") != 0 ]; then + acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++') + fi + generate_core + done + done + done + done + cat >>${WORKSPACE}/report.html < +eof +} + +function generate_core { + echo "" >>${WORKSPACE}/report.html + echo | awk -v acc=${acc} -v acc_l=${acc_last} ' + function show_benchmark(a) { + if(a ~/[1-9]/) { + printf("\n",a); + }else { + printf("\n"); + } + } + function compare_new_last(a,b){ + if(a ~/[1-9]/ && b ~/[1-9]/) { + target = b / a; + if(target >= 0.945) { + status_png = "background-color:#90EE90"; + }else { + status_png = "background-color:#FFD2D2"; + job_status = "fail" + } + printf("", status_png, target); + }else{ + if(a == ""){ + job_status = "fail" + status_png = "background-color:#FFD2D2"; + printf("", status_png); + }else{ + printf(""); + } + } + } + BEGIN { + job_status = "pass" + }{ + // current + show_benchmark(acc) + // Last + printf("\n") + show_benchmark(acc_l) + // current vs last + printf("\n"); + compare_new_last(acc,acc_l) + printf("\n"); + } END{ + printf("\n%s", job_status); + } + ' >>${WORKSPACE}/report.html + job_state=$(tail -1 ${WORKSPACE}/report.html) + sed -i '$s/.*//' ${WORKSPACE}/report.html + if [ ${job_state} == 'fail' ]; then + echo "is_perf_reg=true" >> "$GITHUB_ENV" + fi +} + +function generate_html_head { + cat >${WORKSPACE}/report.html < + + + + + + + Daily Tests - TensorFlow - Jenkins + + +eof +} + +function generate_html_footer { + cat >>${WORKSPACE}/report.html < + + +eof +} + +main diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh index fb420086..7d460ac2 100644 --- a/.github/workflows/scripts/models/model_test.sh +++ b/.github/workflows/scripts/models/model_test.sh @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -eo pipefail +set -o pipefail +set -x source /GenAIEval/.github/workflows/scripts/change_color - +git config --global --add safe.directory /GenAIEval # get parameters PATTERN='[-a-zA-Z0-9_]*=' PERF_STABLE_CHECK=true @@ -34,11 +35,7 @@ for i in "$@"; do esac done -log_dir="/GenAIEval/${device}/${model}" -mkdir -p ${log_dir} working_dir="" -$BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET - main() { case ${tasks} in "text-generation") @@ -48,7 +45,21 @@ main() { *) echo "Not suppotted task"; exit 1;; esac + if [[ ${model} == *"opt"* ]]; then + pretrained="facebook/${model}" + else + pretrained="${model}" + fi + if [[ ${device} == "cpu" ]]; then + model_sourze="hf" + elif [[ ${device} == "hpu" ]]; then + model_sourze="gaudi-hf" + fi + log_dir="/log/${device}/${model}" + mkdir -p ${log_dir} + $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET run_benchmark + cp ${log_dir}/${device}-${tasks}-${model}-${datasets}.log /GenAIEval/ } function prepare() { @@ -62,22 +73,20 @@ function prepare() { else echo "Not found requirements.txt file." fi - if [[ ${device} == "hpu" ]]; then - pip install --upgrade-strategy eager optimum[habana] - fi } function run_benchmark() { cd ${working_dir} - overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log" + overall_log="${log_dir}/${device}-${tasks}-${model}-${datasets}.log" python main.py \ - --model hf \ - --model_args pretrained=${model} \ + --model ${model_sourze} \ + --model_args pretrained=${pretrained} \ --tasks ${datasets} \ --device ${device} \ - --batch_size 112 - 2>&1 | tee ${overall_log} + --batch_size 112 2>&1 | tee ${overall_log} + echo "print log content:" + cat ${overall_log} status=$? if [ ${status} != 0 ]; then echo "Evaluation process returned non-zero exit code." diff --git a/Docker/hpu.dockerfile b/Docker/hpu.dockerfile new file mode 100644 index 00000000..58c4ce1b --- /dev/null +++ b/Docker/hpu.dockerfile @@ -0,0 +1,25 @@ +FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu + +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/root:/usr/lib/habanalabs/ +ARG REPO=https://github.com/opea-project/GenAIEval.git +ARG REPO_PATH="" +ARG BRANCH=main + +RUN apt-get update && \ + apt-get install git-lfs && \ + git-lfs install + +# Download code +SHELL ["/bin/bash", "--login", "-c"] +RUN mkdir -p /GenAIEval +COPY ${REPO_PATH} /GenAIEval +RUN if [ "$REPO_PATH" == "" ]; then rm -rf /GenAIEval/* && rm -rf /GenAIEval/.* ; git clone --single-branch --branch=${BRANCH} ${REPO} /GenAIEval ; fi + +# Build From Source +RUN cd /GenAIEval && \ + python setup.py install && \ + pip install --upgrade-strategy eager optimum[habana] && \ + pip list + +WORKDIR /GenAIEval/ \ No newline at end of file
DeviceTasksModelDatasetsVSAccuracy
${device}${model}${task}${dataset}New%.2f%.2f
Last
New/Last