From bbc8c25502436afd1216748d0d3776f7a37a776a Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 24 May 2024 12:23:24 +0200 Subject: [PATCH] removed ov_tokenizers_path when ov::gena::Tokenizer is passed to LLMPipeline --- src/README.md | 13 +++++++------ .../include/openvino/genai/generation_config.hpp | 2 +- src/cpp/include/openvino/genai/llm_pipeline.hpp | 3 +-- src/cpp/src/llm_pipeline.cpp | 11 ++++------- src/python/py_generate_pipeline.cpp | 4 ++-- 5 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/README.md b/src/README.md index 250bf4105b..06a649a752 100644 --- a/src/README.md +++ b/src/README.md @@ -24,8 +24,8 @@ Calling generate with custom generation config parameters, e.g. config for group import openvino_genai as ov_genai pipe = ov_genai.LLMPipeline(model_path, "CPU") -res = pipe.generate("The Sun is yellow bacause", max_new_tokens=30, num_groups=3, group_size=5) -print(res) +result = pipe.generate("The Sun is yellow bacause", max_new_tokens=30, num_groups=3, group_size=5, diversity_penalty=1.5) +print(result) ``` output: @@ -38,7 +38,7 @@ A simples chat in python: import openvino_genai as ov_genai pipe = ov_ov_genai.LLMPipeline(model_path) -config = {'num_groups': 3, 'group_size': 5, 'diversity_penalty': 1.1} +config = {'num_groups': 3, 'group_size': 5, 'diversity_penalty': 1.5} pipe.set_generation_cofnig(config) pipe.start_chat() @@ -49,7 +49,6 @@ while True:         break     print(pipe(prompt)) pipe.finish_chat() - ``` Test to compare with Huggingface outputs @@ -89,6 +88,9 @@ int main(int argc, char* argv[]) { A simple chat in C++ using grouped beam search decoding ``` cpp +#include "openvino/genai/llm_pipeline.hpp" +#include + int main(int argc, char* argv[]) { std::string prompt; @@ -105,7 +107,7 @@ int main(int argc, char* argv[]) { for (;;;) { std::cout << "question:\n"; std::getline(std::cin, prompt); - if (prompts == "Stop!") + if (prompt == "Stop!") break; std::cout << "answer:\n"; @@ -118,7 +120,6 @@ int main(int argc, char* argv[]) { Streaming example with lambda function ``` cpp - #include "openvino/genai/llm_pipeline.hpp" #include diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 9a922549a1..4c43f880d9 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -42,7 +42,7 @@ enum class StopCriteria { early, heuristic, never }; * @param num_beams number of beams for beam search. 1 disables beam search. * @param num_beam_groups number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. * @param diversity_penalty this value is subtracted from a beam's score if it generates the same token as any beam from other group at a - * particular time. + * particular time. See https://arxiv.org/pdf/1909.05858. * @param length_penalty exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to * the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log * likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index 911a5a237a..7501058ca9 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -88,8 +88,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline { const std::string& model_path, const ov::genai::Tokenizer& tokenizer, const std::string& device="CPU", - const ov::AnyMap& plugin_config = {}, - const std::string& ov_tokenizers_path="" + const ov::AnyMap& plugin_config = {} ); ~LLMPipeline(); diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index 6f8dc675a0..4a3683bbd7 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -85,8 +85,7 @@ class LLMPipeline::LLMPipelineImpl { const std::string& model_path, const ov::genai::Tokenizer& tokenizer, const std::string& device, - const ov::AnyMap& plugin_config, - const std::string& ov_tokenizers_path="" + const ov::AnyMap& plugin_config ); LLMPipelineImpl( @@ -115,18 +114,16 @@ ov::genai::LLMPipeline::LLMPipeline( const std::string& model_path, const ov::genai::Tokenizer& tokenizer, const std::string& device, - const ov::AnyMap& plugin_config, - const std::string& ov_tokenizers_path + const ov::AnyMap& plugin_config ) { - m_pimpl = make_unique(model_path, tokenizer, device, plugin_config, ov_tokenizers_path); + m_pimpl = make_unique(model_path, tokenizer, device, plugin_config); } ov::genai::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl( const std::string& model_path, const ov::genai::Tokenizer& tokenizer, const std::string& device, - const ov::AnyMap& plugin_config, - const std::string& ov_tokenizers_path + const ov::AnyMap& plugin_config ): m_tokenizer(tokenizer) { ov::Core core; diff --git a/src/python/py_generate_pipeline.cpp b/src/python/py_generate_pipeline.cpp index 0a5cf98d02..fa944bb4eb 100644 --- a/src/python/py_generate_pipeline.cpp +++ b/src/python/py_generate_pipeline.cpp @@ -78,9 +78,9 @@ PYBIND11_MODULE(py_generate_pipeline, m) { m.doc() = "Pybind11 binding for LLM Pipeline"; py::class_(m, "LLMPipeline") - .def(py::init(), + .def(py::init(), py::arg("model_path"), py::arg("tokenizer"), py::arg("device") = "CPU", - py::arg("plugin_config") = ov::AnyMap{}, py::arg("ov_tokenizers_path") = ov_tokenizers_module_path()) + py::arg("plugin_config") = ov::AnyMap{}) .def(py::init(), py::arg("path"), py::arg("device") = "CPU", py::arg("plugin_config") = ov::AnyMap{}, py::arg("ov_tokenizers_path") = ov_tokenizers_module_path()) .def("__call__", py::overload_cast(&call_with_kwargs))