Skip to content

Commit

Permalink
Add callback binding (#12)
Browse files Browse the repository at this point in the history
* Add callback binding

* put models back

* Update list_test_models.py

---------

Co-authored-by: Pavel Esir <[email protected]>
  • Loading branch information
Wovchena and pavel-esir authored May 29, 2024
1 parent 5c6c14f commit 174f67a
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 12 deletions.
Binary file removed src/python/openvino_genai/libgenai.so.2024.2.0.0
Binary file not shown.
22 changes: 11 additions & 11 deletions src/python/py_generate_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ using ov::genai::GenerationConfig;
using ov::genai::EncodedResults;
using ov::genai::DecodedResults;
using ov::genai::StopCriteria;
using ov::genai::StreamerBase;
using ov::genai::StreamerVariant;

namespace {
void str_to_stop_criteria(GenerationConfig& config, const std::string& stop_criteria_str){
Expand Down Expand Up @@ -85,29 +85,29 @@ void update_config_from_kwargs(GenerationConfig& config, const py::kwargs& kwarg
if (kwargs.contains("bos_token")) config.bos_token = kwargs["bos_token"].cast<std::string>();
}

py::object call_with_config(LLMPipeline& pipe, const std::string& text, const GenerationConfig& config) {
py::object call_with_config(LLMPipeline& pipe, const std::string& text, const GenerationConfig& config, const StreamerVariant& streamer) {
if (config.num_return_sequences > 1) {
return py::cast(pipe.generate({text}, config).texts);
return py::cast(pipe.generate({text}, config, streamer).texts);
} else {
return py::cast(std::string(pipe.generate(text, config)));
return py::cast(std::string(pipe.generate(text, config, streamer)));
}
}

std::vector<std::string> call_with_config(LLMPipeline& pipe, const std::vector<std::string>& text, const GenerationConfig& config) {
return pipe.generate(text, config);
std::vector<std::string> call_with_config(LLMPipeline& pipe, const std::vector<std::string>& text, const GenerationConfig& config, const StreamerVariant& streamer) {
return pipe.generate(text, config, streamer);
}

std::vector<std::string> call_with_kwargs(LLMPipeline& pipeline, const std::vector<std::string>& texts, const py::kwargs& kwargs) {
GenerationConfig config = pipeline.get_generation_config();
update_config_from_kwargs(config, kwargs);
return call_with_config(pipeline, texts, config);
return call_with_config(pipeline, texts, config, kwargs.contains("streamer") ? kwargs["streamer"].cast<StreamerVariant>() : std::monostate());
}

py::object call_with_kwargs(LLMPipeline& pipeline, const std::string& text, const py::kwargs& kwargs) {
// Create a new GenerationConfig instance and initialize from kwargs
GenerationConfig config = pipeline.get_generation_config();
update_config_from_kwargs(config, kwargs);
return call_with_config(pipeline, text, config);
return call_with_config(pipeline, text, config, kwargs.contains("streamer") ? kwargs["streamer"].cast<StreamerVariant>() : std::monostate());
}

std::filesystem::path with_openvino_tokenizers(const std::filesystem::path& path) {
Expand Down Expand Up @@ -174,12 +174,12 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
" plugin_config (ov::AnyMap): Plugin configuration settings. Default is an empty.")

.def("__call__", py::overload_cast<LLMPipeline&, const std::string&, const py::kwargs&>(&call_with_kwargs))
.def("__call__", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&>(&call_with_config))
.def("__call__", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&, const StreamerVariant&>(&call_with_config))

.def("generate", py::overload_cast<LLMPipeline&, const std::vector<std::string>&, const py::kwargs&>(&call_with_kwargs))
.def("generate", py::overload_cast<LLMPipeline&, const std::vector<std::string>&, const GenerationConfig&>(&call_with_config))
.def("generate", py::overload_cast<LLMPipeline&, const std::vector<std::string>&, const GenerationConfig&, const StreamerVariant&>(&call_with_config))
.def("generate", py::overload_cast<LLMPipeline&, const std::string&, const py::kwargs&>(&call_with_kwargs))
.def("generate", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&>(&call_with_config))
.def("generate", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&, const StreamerVariant&>(&call_with_config))

// todo: if input_ids is a ov::Tensor/numpy tensor
// todo: implement calling generate/operator() with StreamerBase or lambda streamer
Expand Down
2 changes: 1 addition & 1 deletion tests/python_tests/list_test_models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
def models_list():
model_ids = [
("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0"),
("databricks/dolly-v2-3b", "dolly-v2-3b"),
# ("databricks/dolly-v2-3b", "dolly-v2-3b"), # not free disk space on CI machine
("microsoft/phi-1_5", "phi-1_5/"),
# ("google/gemma-2b-it", "gemma-2b-it"),
# ("google/gemma-7b-it", "gemma-7b-it"),
Expand Down
31 changes: 31 additions & 0 deletions tests/python_tests/test_generate_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (C) 2023-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import openvino_genai
import pytest
from list_test_models import models_list

Expand Down Expand Up @@ -111,3 +112,33 @@ def test_beam_search_long_sentences(model_fixture, num_beam_groups, group_size,
max_new_tokens=max_new_tokens,
)
run_hf_ov_genai_comparison(model_fixture, generation_config, prompt)


def user_defined_callback(subword):
print(subword)


@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
def test_callback_one_string(model_fixture, callback):
pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
pipe.generate('', openvino_genai.GenerationConfig(), callback)


@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
def test_callback_batch_fail(model_fixture, callback):
pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
with pytest.raises(RuntimeError):
pipe.generate(['1', '2'], openvino_genai.GenerationConfig(), callback)


@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
def test_callback_kwargs_one_string(model_fixture, callback):
pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
pipe.generate('', max_new_tokens=10, streamer=callback)


@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
def test_callback_kwargs_batch_fail(model_fixture, callback):
pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
with pytest.raises(RuntimeError):
pipe.generate(['1', '2'], max_new_tokens=10, streamer=callback)

0 comments on commit 174f67a

Please sign in to comment.