causal_lm->greedt_causal_lm

Wovchena · Dec 18, 2023 · 152da44 · 152da44
1 parent c1aafc1
commit 152da44
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 27 deletions.
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -2,16 +2,14 @@ name: causal_lm_cpp
 on:
   pull_request:
     paths:
-      - text_generation/causal_lm/cpp/**
-      - '!text_generation/causal_lm/cpp/README.md'
+      - text_generation/causal_lm/cpp/*
+      - llm_bench/python/**
       - thirdparty/openvino_contrib
       - .github/workflows/causal_lm_cpp.yml
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
+      - '!**.md'
 jobs:
-  causal_lm_cpp:
-    runs-on: ubuntu-20.04-8-cores
+  cpp-greedy_causal_lm:
+    runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v4
         with:

diff --git a/llm_bench/python/requirements.txt b/llm_bench/python/requirements.txt
@@ -7,6 +7,7 @@ torch
 transformers>=4.33.0
 diffusers>=0.22.0
 optimum>=1.14.0,<1.15.0
+# TODO: replace with slyalin after https://github.com/huggingface/optimum-intel/pull/486/#discussion_r1428853330 is resolved
 git+https://github.com/Wovchena/optimum-intel.git@stateful
 git+https://github.com/openvinotoolkit/nncf.git
 packaging

diff --git a/text_generation/causal_lm/cpp/CMakeLists.txt b/text_generation/causal_lm/cpp/CMakeLists.txt
@@ -8,19 +8,19 @@ project(causal_lm)
 list(APPEND CUSTOM_OPERATIONS tokenizer)
 add_subdirectory(../../../thirdparty/openvino_contrib/modules/custom_operations/ "${CMAKE_CURRENT_BINARY_DIR}/custom_operations/")
 
-add_executable(causal_lm causal_lm.cpp)
-target_compile_definitions(causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
+add_executable(greedy_causal_lm greedy_causal_lm.cpp)
+target_compile_definitions(greedy_causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
 find_package(OpenVINO REQUIRED COMPONENTS Runtime)
-target_link_libraries(causal_lm PRIVATE openvino::runtime user_ov_extensions)
-set_target_properties(causal_lm PROPERTIES CXX_STANDARD 17)
-set_target_properties(causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
+target_link_libraries(greedy_causal_lm PRIVATE openvino::runtime user_ov_extensions)
+set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD 17)
+set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
 if(MSVC)
     target_compile_options(
-        causal_lm PRIVATE
+        greedy_causal_lm PRIVATE
         /Wall  # Display all warnings
         /wd4710 /wd4711  # Disable the inline warnings
         /EHsc  # Enable standard C++ stack unwinding, assume functions with extern "C" never throw
     )
 else()
-    target_compile_options(causal_lm PRIVATE -Wall)  # Display all warnings
+    target_compile_options(greedy_causal_lm PRIVATE -Wall)  # Display all warnings
 endif()
diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
@@ -7,7 +7,7 @@ This application showcases inference of a casual language model (LM). It doesn't
 
 ## How it works
 
-The program loads a tokenizer, detokenizer, and a model (`.xml` and `.bin`) to OpenVINO. The model is reshaped to batch 1 and variable prompt length. A prompt is tokenized and passed to the model. The model greedily generates token by token until the special end of sequence (EOS) token is obtained. The predicted tokens are converted to chars and printed in a streaming fashion.
+The program loads a model, a tokenizer and a detokenizer (`.xml` and `.bin`) to OpenVINO. A prompt is tokenized and passed to the model. The model greedily generates token by token until the special end of sequence (EOS) token is obtained. The predicted tokens are converted to chars and printed in a streaming fashion.
 
 ## Install OpenVINO Runtime
 
@@ -24,19 +24,22 @@ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ && cmake --build ./build/ --c
 ## Supported models
 
 1. LLaMA 2
-   1. https://huggingface.co/meta-llama/Llama-2-7b-hf
-   2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-   3. https://huggingface.co/meta-llama/Llama-2-13b-hf
    4. https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
-   5. https://huggingface.co/meta-llama/Llama-2-70b-hf
+   3. https://huggingface.co/meta-llama/Llama-2-13b-hf
+   2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
+   1. https://huggingface.co/meta-llama/Llama-2-7b-hf
    6. https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-2. OpenLLaMA
-   1. https://huggingface.co/openlm-research/open_llama_3b
-   2. https://huggingface.co/openlm-research/open_llama_7b
+   5. https://huggingface.co/meta-llama/Llama-2-70b-hf
+2. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
+3. OpenLLaMA
    3. https://huggingface.co/openlm-research/open_llama_13b
+   1. https://huggingface.co/openlm-research/open_llama_3b
    4. https://huggingface.co/openlm-research/open_llama_3b_v2
+   2. https://huggingface.co/openlm-research/open_llama_7b
    5. https://huggingface.co/openlm-research/open_llama_7b_v2
-3. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
+4. TinyLlama
+   1. https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6
+   2. https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T
 
 This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.
 
@@ -47,7 +50,7 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upg
 ```sh
 source <INSTALL_DIR>/setupvars.sh
 python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
-python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --save_orig --stateful
+python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --precision FP16 --stateful
 python ./convert_tokenizers.py ./Llama-2-7b-hf/
 ```
 

diff --git a/text_generation/causal_lm/cpp/causal_lm.cpp → ...ration/causal_lm/cpp/greedy_causal_lm.cpp b/text_generation/causal_lm/cpp/causal_lm.cpp → ...ration/causal_lm/cpp/greedy_causal_lm.cpp
diff --git a/text_generation/causal_lm/cpp/set_up_and_run.sh b/text_generation/causal_lm/cpp/set_up_and_run.sh
@@ -17,10 +17,10 @@ curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/
 sudo ./ov/install_dependencies/install_openvino_dependencies.sh
 
 source ./ov/setupvars.sh
-python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ../../../llm_bench/python/convert.py --model_id openlm-research/open_llama_3b_v2 --output_dir ./open_llama_3b_v2/ --stateful &
+python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ../../../llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T --output_dir ./TinyLlama-1.1B-intermediate-step-1195k-token-2.5T/ --precision FP16 --stateful &
 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
 cmake --build ./build/ --config Release -j
 wait
 
-python ./convert_tokenizers.py ./open_llama_3b_v2/pytorch/dldt/FP32/
-./build/causal_lm ./open_llama_3b_v2/pytorch/dldt/FP32/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0"
+python ./convert_tokenizers.py ./TinyLlama-1.1B-intermediate-step-1195k-token-2.5T/pytorch/dldt/FP16/
+./build/greedy_causal_lm ./TinyLlama-1.1B-intermediate-step-1195k-token-2.5T/pytorch/dldt/FP16/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0"