Merge branch 'master' into xufang/speculative_decoding_profile

openvinotoolkit · Dec 26, 2024 · 5d063db · 5d063db
2 parents 60722e2 + 9bcadf7
commit 5d063db
Show file tree

Hide file tree

Showing 30 changed files with 637 additions and 437 deletions.
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -491,7 +491,6 @@ jobs:
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           python -m pip install -r ./samples/requirements.txt
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat --task text-generation-with-past
       - name: run and compare
         run: |
           source ./ov/setupvars.sh
@@ -505,36 +504,22 @@ jobs:
 
           ./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
           ./build/samples/cpp/text_generation/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt
+          python ./samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_py.txt
           python -c "
           with open('predictions_greedy.txt', 'r') as f:
               predicted_greedy = f.readline()
           with open('predictions_prompt_lookup.txt', 'r') as f:
               predicted_prompt_lookup = f.readline()
+          with open('predictions_py.txt', 'r') as f:
+              predicted_prompt_lookup_py = f.readline()
           assert predicted_greedy == predicted_prompt_lookup
+          assert predicted_greedy == predicted_prompt_lookup_py
+          assert predicted_prompt_lookup == predicted_prompt_lookup_py
           "
           echo "Prompt lookup" passed
-      - name: run and compare (model with seq_length_axis = 1)
-        run: |
-          source ./ov/setupvars.sh
-
-          echo 'Code:```python
-          def add(a, b):
-              return a + b
-          ```
-          Question: Can you please add 2 and 3
-          A:' > ./prompt.txt
-
-          ./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
-          ./build/samples/cpp/text_generation/greedy_causal_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_greedy.txt
-          python -c "
-          with open('predictions_greedy.txt', 'r') as f:
-              predicted_greedy = f.readline()
-          with open('predictions_prompt_lookup.txt', 'r') as f:
-              predicted_prompt_lookup = f.readline()
-          assert predicted_greedy == predicted_prompt_lookup
-          "
-          echo "Prompt lookup" passed
-
+        env:
+          PYTHONPATH: "./build/:$PYTHONPATH"
+          LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
   cpp-Phi-1_5:
     runs-on: ubuntu-20.04-16-cores
     defaults:

diff --git a/.github/workflows/llm_bench-python.yml b/.github/workflows/llm_bench-python.yml
@@ -61,7 +61,6 @@ jobs:
       SRC_DIR: ${{ github.workspace }}
       LLM_BENCH_PYPATH: ${{ github.workspace }}/tools/llm_bench
       WWB_PATH: ${{ github.workspace }}/tools/who_what_benchmark
-      OPENVINO_LOG_LEVEL: 3
 
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -103,30 +102,34 @@ jobs:
       - name: Test native pytorch model on Linux
         run: |
           git clone --depth 1 https://huggingface.co/katuni4ka/tiny-random-qwen
-          python ./tools/llm_bench/benchmark.py -m tiny-random-qwen -d cpu -n 1 -f pt
+          python ./tools/llm_bench/benchmark.py -m tiny-random-qwen -d cpu -n 1 -f pt -ic 20
+          rm -rf tiny-random-qwen
         env:
           GIT_LFS_SKIP_SMUDGE: 0
       - name: Test tiny-random-baichuan2 on Linux Optimum Intel
         run: |
           optimum-cli export openvino --model katuni4ka/tiny-random-baichuan2 --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16/ -d cpu -n 1 --optimum
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16/ -d cpu -n 1 --optimum -ic 10
+          rm -rf ./ov_models/tiny-random-baichuan2
       - name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux Optimum Intel
         run: |
           huggingface-cli download OpenVINO/LCM_Dreamshaper_v7-int8-ov --local-dir ov_models/lcm_dreamshaper_v7
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum -ic 4
       - name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI
         run: |
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 -ic 4
       - name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI and LoRA
         run: |
           wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7 -ic 4
+          rm -rf ./ov_models/lcm_dreamshaper_v7/
       - name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Deconding mode on Linux
         run: |
           optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
           optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --assistant_confidence_threshold 0.4
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --num_assistant_tokens 5
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --assistant_confidence_threshold 0.4 -ic 20
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --num_assistant_tokens 5 -ic 20
+          rm -rf ov_models/TinyLlama-1.1B-Chat-v1.0
       - name: Test whisper-tiny on Linux
         run: |
           GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
@@ -138,11 +141,14 @@ jobs:
           optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny ./ov_models/whisper-tiny
           python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1 --optimum
           python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1
+          rm -rf ./ov_models/whisper-tiny
+          rm -rf multilingual_librispeech
       - name: Text InternVL2-1B on Linux
         run: |
           optimum-cli export openvino --model OpenGVLab/InternVL2-1B ./ov_models/internvl2-1B --task image-text-to-text --trust-remote-code
           python ./tools/llm_bench/benchmark.py -m ./ov_models/internvl2-1B --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20
           python ./tools/llm_bench/benchmark.py -m ./ov_models/internvl2-1B --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20 --optimum
+          rm -rf ./ov_models/internvl2-1B
       - name: WWB Tests
         run: |
           pip install git+https://github.com/huggingface/optimum-intel.git

diff --git a/samples/cpp/prompt_lookup_decoding_lm/CMakeLists.txt b/samples/cpp/prompt_lookup_decoding_lm/CMakeLists.txt
@@ -1,30 +1,23 @@
 # Copyright (C) 2023-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-find_package(OpenVINO REQUIRED COMPONENTS Runtime Threading)
-
 find_package(OpenVINOGenAI REQUIRED
     PATHS
         "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
         ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
     NO_CMAKE_FIND_ROOT_PATH
 )
 
-add_executable(prompt_lookup_decoding_lm prompt_lookup_decoding_lm.cpp)
-target_link_libraries(prompt_lookup_decoding_lm PRIVATE openvino::runtime openvino::threading)
-set_target_properties(prompt_lookup_decoding_lm PROPERTIES
-    COMPILE_PDB_NAME prompt_lookup_decoding_lm
+set(TARGET_NAME prompt_lookup_decoding_lm)
+add_executable(${TARGET_NAME} ${TARGET_NAME}.cpp)
+target_link_libraries(${TARGET_NAME} PRIVATE openvino::genai)
+
+set_target_properties(${TARGET_NAME} PROPERTIES
+    COMPILE_PDB_NAME ${TARGET_NAME} 
     # Ensure out of box LC_RPATH on macOS with SIP
     INSTALL_RPATH_USE_LINK_PATH ON)
-target_compile_features(prompt_lookup_decoding_lm PRIVATE cxx_std_17)
-
-get_target_property(genai_imported openvino::genai IMPORTED_LOCATION)
-set(OPENVINO_TOKENIZERS_PATH $<IF:$<BOOL:${genai_imported}>,${genai_imported},$<TARGET_FILE_DIR:openvino::genai>>)
-set(OPENVINO_TOKENIZERS_FILENAME "${CMAKE_SHARED_LIBRARY_PREFIX}openvino_tokenizers${CMAKE_SHARED_LIBRARY_SUFFIX}")
-target_compile_definitions(prompt_lookup_decoding_lm PRIVATE
-    OPENVINO_TOKENIZERS_PATH="${OPENVINO_TOKENIZERS_PATH}/${OPENVINO_TOKENIZERS_FILENAME}")
 
-install(TARGETS prompt_lookup_decoding_lm
+install(TARGETS ${TARGET_NAME} 
         RUNTIME DESTINATION samples_bin/
         COMPONENT samples_bin
         EXCLUDE_FROM_ALL)