From c0346791fe162c72e4559d1fac93e0bbb4ca09e2 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Wed, 13 Dec 2023 20:18:15 +0800 Subject: [PATCH 1/7] Fix directory typo and fix causal_lm demo for llama architecture --- .../{casual_lm => causal_lm}/cpp/CMakeLists.txt | 16 ++++++++-------- .../{casual_lm => causal_lm}/cpp/README.md | 4 ++-- .../cpp/causal_lm.cpp} | 16 ++++------------ .../cpp/convert_tokenizers.py | 0 .../cpp/set_up_and_run.sh | 4 ++-- 5 files changed, 16 insertions(+), 24 deletions(-) rename text_generation/{casual_lm => causal_lm}/cpp/CMakeLists.txt (63%) rename text_generation/{casual_lm => causal_lm}/cpp/README.md (95%) rename text_generation/{casual_lm/cpp/casual_lm.cpp => causal_lm/cpp/causal_lm.cpp} (87%) rename text_generation/{casual_lm => causal_lm}/cpp/convert_tokenizers.py (100%) rename text_generation/{casual_lm => causal_lm}/cpp/set_up_and_run.sh (85%) diff --git a/text_generation/casual_lm/cpp/CMakeLists.txt b/text_generation/causal_lm/cpp/CMakeLists.txt similarity index 63% rename from text_generation/casual_lm/cpp/CMakeLists.txt rename to text_generation/causal_lm/cpp/CMakeLists.txt index 79e8290abe..35c0428576 100644 --- a/text_generation/casual_lm/cpp/CMakeLists.txt +++ b/text_generation/causal_lm/cpp/CMakeLists.txt @@ -2,25 +2,25 @@ # SPDX-License-Identifier: Apache-2.0 cmake_minimum_required(VERSION 3.15) -project(casual_lm) +project(causal_lm) # Build user_ov_extensions list(APPEND CUSTOM_OPERATIONS tokenizer) add_subdirectory(../../../thirdparty/openvino_contrib/modules/custom_operations/ "${CMAKE_CURRENT_BINARY_DIR}/custom_operations/") -add_executable(casual_lm casual_lm.cpp) -target_compile_definitions(casual_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$\") +add_executable(causal_lm causal_lm.cpp) +target_compile_definitions(causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$\") find_package(OpenVINO REQUIRED COMPONENTS Runtime) -target_link_libraries(casual_lm PRIVATE openvino::runtime user_ov_extensions) -set_target_properties(casual_lm PROPERTIES CXX_STANDARD 17) -set_target_properties(casual_lm PROPERTIES CXX_STANDARD_REQUIRED ON) +target_link_libraries(causal_lm PRIVATE openvino::runtime user_ov_extensions) +set_target_properties(causal_lm PROPERTIES CXX_STANDARD 17) +set_target_properties(causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON) if(MSVC) target_compile_options( - casual_lm PRIVATE + causal_lm PRIVATE /Wall # Display all warnings /wd4710 /wd4711 # Disable the inline warnings /EHsc # Enable standard C++ stack unwinding, assume functions with extern "C" never throw ) else() - target_compile_options(casual_lm PRIVATE -Wall) # Display all warnings + target_compile_options(causal_lm PRIVATE -Wall) # Display all warnings endif() diff --git a/text_generation/casual_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md similarity index 95% rename from text_generation/casual_lm/cpp/README.md rename to text_generation/causal_lm/cpp/README.md index 9db8f16aea..40a1038fba 100644 --- a/text_generation/casual_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -53,8 +53,8 @@ python ./convert_tokenizers.py ./Llama-2-7b-hf/ ## Run -Usage: `casual_lm ""` +Usage: `causal_lm ""` -Example: `./build/casual_lm ./Llama-2-7b-hf/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?"` +Example: `./build/causal_lm ./Llama-2-7b-hf/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?"` To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. diff --git a/text_generation/casual_lm/cpp/casual_lm.cpp b/text_generation/causal_lm/cpp/causal_lm.cpp similarity index 87% rename from text_generation/casual_lm/cpp/casual_lm.cpp rename to text_generation/causal_lm/cpp/causal_lm.cpp index faa804ae66..24ea6da568 100644 --- a/text_generation/casual_lm/cpp/casual_lm.cpp +++ b/text_generation/causal_lm/cpp/causal_lm.cpp @@ -39,43 +39,35 @@ int main(int argc, char* argv[]) try { }}, {1, ov::PartialShape{ BATCH_SIZE, -1 - }}, - {2, ov::PartialShape{ - BATCH_SIZE, -1 }} }; std::vector> inputs = model->inputs(); - for (size_t idx = 3; idx < inputs.size(); ++idx) { + for (size_t idx = 2; idx < inputs.size(); ++idx){ ov::PartialShape shape = inputs.at(idx).get_partial_shape(); shape[0] = BATCH_SIZE; shapes.emplace(idx, shape); } model->reshape(shapes); ov::InferRequest ireq = core.compile_model(model, "CPU").create_infer_request(); - for (size_t idx = 2; idx < inputs.size(); ++idx) { + for (size_t idx = 3; idx < inputs.size(); ++idx) { ireq.get_input_tensor(idx).set_shape(inputs.at(idx).get_partial_shape().get_min_shape()); } ireq.get_tensor("input_ids").set_shape(input_ids.get_shape()); // TODO: replace with ireq.set_tensor("input_ids", input_ids); after it's fixed std::copy_n(input_ids.data(), input_ids.get_size(), ireq.get_tensor("input_ids").data()); ireq.get_tensor("attention_mask").set_shape(attention_mask.get_shape()); std::fill_n(ireq.get_tensor("attention_mask").data(), attention_mask.get_size(), 1); - ireq.get_tensor("position_ids").set_shape(input_ids.get_shape()); - std::iota(ireq.get_tensor("position_ids").data(), ireq.get_tensor("position_ids").data() + ireq.get_tensor("position_ids").get_size(), 0); ireq.infer(); size_t vocab_size = ireq.get_tensor("logits").get_shape().back(); float* logits = ireq.get_tensor("logits").data() + (input_ids.get_size() - 1) * vocab_size; int64_t out_token = std::max_element(logits, logits + vocab_size) - logits; - ireq.get_tensor("input_ids").set_shape({BATCH_SIZE, 1}); - ireq.get_tensor("position_ids").set_shape({BATCH_SIZE, 1}); constexpr int64_t SPECIAL_EOS_TOKEN = 2; // There's no way to extract the value from the detokenizer for now while (out_token != SPECIAL_EOS_TOKEN) { ireq.get_tensor("input_ids").data()[0] = out_token; ireq.get_tensor("attention_mask").set_shape({BATCH_SIZE, ireq.get_tensor("attention_mask").get_shape()[1] + 1}); std::fill_n(ireq.get_tensor("attention_mask").data(), ireq.get_tensor("attention_mask").get_size(), 1); - ireq.get_tensor("position_ids").data()[0] = ireq.get_tensor("attention_mask").get_size() - 2; - for (size_t idx = 3; idx < inputs.size(); ++idx) { - ireq.set_input_tensor(idx, ireq.get_output_tensor(idx - 2)); + for (size_t idx = 2; idx < inputs.size(); ++idx) { + ireq.set_input_tensor(idx, ireq.get_output_tensor(idx - 1)); } ireq.start_async(); print_token(detokenizer, out_token); diff --git a/text_generation/casual_lm/cpp/convert_tokenizers.py b/text_generation/causal_lm/cpp/convert_tokenizers.py similarity index 100% rename from text_generation/casual_lm/cpp/convert_tokenizers.py rename to text_generation/causal_lm/cpp/convert_tokenizers.py diff --git a/text_generation/casual_lm/cpp/set_up_and_run.sh b/text_generation/causal_lm/cpp/set_up_and_run.sh similarity index 85% rename from text_generation/casual_lm/cpp/set_up_and_run.sh rename to text_generation/causal_lm/cpp/set_up_and_run.sh index e19985a76f..9bc7f3b5d8 100755 --- a/text_generation/casual_lm/cpp/set_up_and_run.sh +++ b/text_generation/causal_lm/cpp/set_up_and_run.sh @@ -13,7 +13,7 @@ function abs_path() { cd "`abs_path`" mkdir ./ov/ -curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.1/linux/l_openvino_toolkit_ubuntu20_2023.1.0.12185.47b736f63ed_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz +curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.2/linux/l_openvino_toolkit_ubuntu20_2023.2.0.13089.cfd42bd2cb0_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh source ./ov/setupvars.sh @@ -23,4 +23,4 @@ cmake --build ./build/ --config Release -j wait python ./convert_tokenizers.py ./open_llama_3b_v2/ -./build/casual_lm ./open_llama_3b_v2/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0" +./build/casual_lm ./open_llama_3b_v2/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?" From b289e775175f8669bb72f2937d8585d46218b48e Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Wed, 13 Dec 2023 20:30:58 +0800 Subject: [PATCH 2/7] Fix format --- text_generation/causal_lm/cpp/causal_lm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text_generation/causal_lm/cpp/causal_lm.cpp b/text_generation/causal_lm/cpp/causal_lm.cpp index 24ea6da568..719ab2b561 100644 --- a/text_generation/causal_lm/cpp/causal_lm.cpp +++ b/text_generation/causal_lm/cpp/causal_lm.cpp @@ -42,7 +42,7 @@ int main(int argc, char* argv[]) try { }} }; std::vector> inputs = model->inputs(); - for (size_t idx = 2; idx < inputs.size(); ++idx){ + for (size_t idx = 2; idx < inputs.size(); ++idx) { ov::PartialShape shape = inputs.at(idx).get_partial_shape(); shape[0] = BATCH_SIZE; shapes.emplace(idx, shape); From 6c5e28fecd29a893dfd1edd1d58a6c6854c90b53 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Wed, 13 Dec 2023 20:43:12 +0800 Subject: [PATCH 3/7] Fix typo --- text_generation/causal_lm/cpp/causal_lm.cpp | 2 +- text_generation/causal_lm/cpp/set_up_and_run.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/text_generation/causal_lm/cpp/causal_lm.cpp b/text_generation/causal_lm/cpp/causal_lm.cpp index 719ab2b561..1e13c991b0 100644 --- a/text_generation/causal_lm/cpp/causal_lm.cpp +++ b/text_generation/causal_lm/cpp/causal_lm.cpp @@ -49,7 +49,7 @@ int main(int argc, char* argv[]) try { } model->reshape(shapes); ov::InferRequest ireq = core.compile_model(model, "CPU").create_infer_request(); - for (size_t idx = 3; idx < inputs.size(); ++idx) { + for (size_t idx = 2; idx < inputs.size(); ++idx) { ireq.get_input_tensor(idx).set_shape(inputs.at(idx).get_partial_shape().get_min_shape()); } ireq.get_tensor("input_ids").set_shape(input_ids.get_shape()); // TODO: replace with ireq.set_tensor("input_ids", input_ids); after it's fixed diff --git a/text_generation/causal_lm/cpp/set_up_and_run.sh b/text_generation/causal_lm/cpp/set_up_and_run.sh index 9bc7f3b5d8..b4af6c3b93 100755 --- a/text_generation/causal_lm/cpp/set_up_and_run.sh +++ b/text_generation/causal_lm/cpp/set_up_and_run.sh @@ -23,4 +23,4 @@ cmake --build ./build/ --config Release -j wait python ./convert_tokenizers.py ./open_llama_3b_v2/ -./build/casual_lm ./open_llama_3b_v2/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?" +./build/causal_lm ./open_llama_3b_v2/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?" From 8e67c5fa46c1b93d0f9c8394c9f31a9f1c8e25e0 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Wed, 13 Dec 2023 20:48:42 +0800 Subject: [PATCH 4/7] Fix CI for causal lm cpp --- .github/workflows/casual_lm_cpp.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/casual_lm_cpp.yml b/.github/workflows/casual_lm_cpp.yml index f9219e4176..9cd685cf8b 100644 --- a/.github/workflows/casual_lm_cpp.yml +++ b/.github/workflows/casual_lm_cpp.yml @@ -1,16 +1,16 @@ -name: casual_lm_cpp +name: causal_lm_cpp on: pull_request: paths: - - text_generation/casual_lm/cpp/** - - '!text_generation/casual_lm/cpp/README.md' + - text_generation/causal_lm/cpp/** + - '!text_generation/causal_lm/cpp/README.md' - thirdparty/openvino_contrib - - .github/workflows/casual_lm_cpp.yml + - .github/workflows/causal_lm_cpp.yml concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: - casual_lm_cpp: + causal_lm_cpp: runs-on: ubuntu-20.04-8-cores steps: - uses: actions/checkout@v4 @@ -19,4 +19,4 @@ jobs: - uses: actions/setup-python@v4 with: python-version: 3.8 - - run: ./text_generation/casual_lm/cpp/set_up_and_run.sh + - run: ./text_generation/causal_lm/cpp/set_up_and_run.sh From 54d38b518192ae6a942021c91ce3f01c3262c934 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Wed, 13 Dec 2023 22:39:07 +0800 Subject: [PATCH 5/7] Add position id support for updated optimum --- text_generation/causal_lm/cpp/causal_lm.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/text_generation/causal_lm/cpp/causal_lm.cpp b/text_generation/causal_lm/cpp/causal_lm.cpp index 1e13c991b0..faa804ae66 100644 --- a/text_generation/causal_lm/cpp/causal_lm.cpp +++ b/text_generation/causal_lm/cpp/causal_lm.cpp @@ -39,10 +39,13 @@ int main(int argc, char* argv[]) try { }}, {1, ov::PartialShape{ BATCH_SIZE, -1 + }}, + {2, ov::PartialShape{ + BATCH_SIZE, -1 }} }; std::vector> inputs = model->inputs(); - for (size_t idx = 2; idx < inputs.size(); ++idx) { + for (size_t idx = 3; idx < inputs.size(); ++idx) { ov::PartialShape shape = inputs.at(idx).get_partial_shape(); shape[0] = BATCH_SIZE; shapes.emplace(idx, shape); @@ -56,18 +59,23 @@ int main(int argc, char* argv[]) try { std::copy_n(input_ids.data(), input_ids.get_size(), ireq.get_tensor("input_ids").data()); ireq.get_tensor("attention_mask").set_shape(attention_mask.get_shape()); std::fill_n(ireq.get_tensor("attention_mask").data(), attention_mask.get_size(), 1); + ireq.get_tensor("position_ids").set_shape(input_ids.get_shape()); + std::iota(ireq.get_tensor("position_ids").data(), ireq.get_tensor("position_ids").data() + ireq.get_tensor("position_ids").get_size(), 0); ireq.infer(); size_t vocab_size = ireq.get_tensor("logits").get_shape().back(); float* logits = ireq.get_tensor("logits").data() + (input_ids.get_size() - 1) * vocab_size; int64_t out_token = std::max_element(logits, logits + vocab_size) - logits; + ireq.get_tensor("input_ids").set_shape({BATCH_SIZE, 1}); + ireq.get_tensor("position_ids").set_shape({BATCH_SIZE, 1}); constexpr int64_t SPECIAL_EOS_TOKEN = 2; // There's no way to extract the value from the detokenizer for now while (out_token != SPECIAL_EOS_TOKEN) { ireq.get_tensor("input_ids").data()[0] = out_token; ireq.get_tensor("attention_mask").set_shape({BATCH_SIZE, ireq.get_tensor("attention_mask").get_shape()[1] + 1}); std::fill_n(ireq.get_tensor("attention_mask").data(), ireq.get_tensor("attention_mask").get_size(), 1); - for (size_t idx = 2; idx < inputs.size(); ++idx) { - ireq.set_input_tensor(idx, ireq.get_output_tensor(idx - 1)); + ireq.get_tensor("position_ids").data()[0] = ireq.get_tensor("attention_mask").get_size() - 2; + for (size_t idx = 3; idx < inputs.size(); ++idx) { + ireq.set_input_tensor(idx, ireq.get_output_tensor(idx - 2)); } ireq.start_async(); print_token(detokenizer, out_token); From 6659243021673a5c64686bde3877878dfd80060b Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Wed, 13 Dec 2023 22:39:43 +0800 Subject: [PATCH 6/7] WA for CI timeout with unstoped sentence generation --- text_generation/causal_lm/cpp/set_up_and_run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text_generation/causal_lm/cpp/set_up_and_run.sh b/text_generation/causal_lm/cpp/set_up_and_run.sh index b4af6c3b93..061f9ebfc6 100755 --- a/text_generation/causal_lm/cpp/set_up_and_run.sh +++ b/text_generation/causal_lm/cpp/set_up_and_run.sh @@ -23,4 +23,4 @@ cmake --build ./build/ --config Release -j wait python ./convert_tokenizers.py ./open_llama_3b_v2/ -./build/causal_lm ./open_llama_3b_v2/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?" +./build/causal_lm ./open_llama_3b_v2/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0" From 3475e00fedbd55384180255e64b16b81a22b3034 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Thu, 14 Dec 2023 15:29:33 +0800 Subject: [PATCH 7/7] Rename causal_lm workflow file --- .github/workflows/{casual_lm_cpp.yml => causal_lm_cpp.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{casual_lm_cpp.yml => causal_lm_cpp.yml} (100%) diff --git a/.github/workflows/casual_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml similarity index 100% rename from .github/workflows/casual_lm_cpp.yml rename to .github/workflows/causal_lm_cpp.yml