Skip to content

Commit

Permalink
Port from master (#1285)
Browse files Browse the repository at this point in the history
      - #1158
- #1178
- #1214
- #1243
- #1253
- #1259
- #1266
- #1271
- #1278
- #1280
- #1284
- e4a86f6
- #1246
- #958

---------

Co-authored-by: Anastasiia Pnevskaia <[email protected]>
Co-authored-by: Helena Kloosterman <[email protected]>
Co-authored-by: Vladimir Zlobin <[email protected]>
Co-authored-by: Dmitry Matveev <[email protected]>
Co-authored-by: Anna Likholat <[email protected]>
Co-authored-by: Alina Kladieva <[email protected]>
  • Loading branch information
7 people authored Dec 4, 2024
1 parent e42723a commit 8bef5a3
Show file tree
Hide file tree
Showing 35 changed files with 2,245 additions and 208 deletions.
2 changes: 1 addition & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
- 'tests/cpp/generate_config.cpp'
- 'tests/cpp/sampler.cpp'

- 'category: LoRA':
'category: LoRA':
- 'src/cpp/include/openvino/genai/lora_adapter.hpp'
- 'src/cpp/src/lora_adapter.cpp'
- 'src/cpp/src/lora_helper.cpp'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ jobs:
- name: Test bindings (wheel)
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
genai_python_lib_whisper:
Expand Down Expand Up @@ -307,7 +307,7 @@ jobs:
- name: Test bindings (wheel)
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
genai_python_lib_vlm:
Expand Down
35 changes: 30 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,45 @@ if(POLICY CMP0169)
cmake_policy(SET CMP0169 OLD)
endif()

if(UNIX AND NOT (APPLE OR ANDROID OR CYGWIN))
set(LINUX ON)
endif()

project(OpenVINOGenAI
VERSION 2024.5.0.0
DESCRIPTION "OpenVINO GenAI"
HOMEPAGE_URL "https://github.com/openvinotoolkit/openvino.genai"
LANGUAGES CXX C)

if(NOT DEFINED Python3_FIND_VIRTUALENV)
set(Python3_FIND_VIRTUALENV FIRST)
endif()

# Looking for OpenVINO in the python distribution. It doesn't work for cross-compiling build
if(NOT CMAKE_CROSSCOMPILING)
find_package(Python3 REQUIRED)
execute_process(
COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')"
OUTPUT_VARIABLE OpenVINO_DIR_PY
ERROR_QUIET
)
endif()

# Find OpenVINODeveloperPackage first to compile with SDL flags
find_package(OpenVINODeveloperPackage ${OpenVINOGenAI_VERSION} QUIET
COMPONENTS Runtime Threading
PATHS "${OpenVINO_DIR}")
if(NOT OpenVINODeveloperPackage_FOUND)
find_package(OpenVINO ${OpenVINOGenAI_VERSION} REQUIRED
COMPONENTS Runtime Threading)
COMPONENTS Runtime Threading
PATHS "${OpenVINO_DIR_PY}")
endif()

include(cmake/features.cmake)

if(ENABLE_PYTHON)
# the following two calls are required for cross-compilation
if(OpenVINODeveloperPackage_DIR)
if(OpenVINODeveloperPackage_FOUND)
ov_find_python3(REQUIRED)
ov_detect_python_module_extension()
else()
Expand All @@ -62,9 +81,15 @@ endif()

add_subdirectory(thirdparty)
add_subdirectory(src)
add_subdirectory(samples)
add_subdirectory(tools/continuous_batching)
add_subdirectory(tests/cpp)
if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples")
add_subdirectory(samples)
endif()
if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching")
add_subdirectory(tools/continuous_batching)
endif()
if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp")
add_subdirectory(tests/cpp)
endif()

install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)
Expand Down
25 changes: 21 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,34 @@ optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code --

### Run generation using VLMPipeline API in Python

See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) for a demo application.

Run the following command to download a sample image:

```sh
curl -O "https://storage.openvinotoolkit.org/test_data/images/dog.jpg"
```

```python
import numpy as np
import openvino as ov
import openvino_genai as ov_genai
#Will run model on CPU, GPU is a possible option
from PIL import Image

# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
pipe = ov_genai.VLMPipeline("./MiniCPM-V-2_6/", "CPU")
rgb = read_image("cat.jpg")
print(pipe.generate(prompt, image=rgb, max_new_tokens=100))

image = Image.open("dog.jpg")
image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
image_data = ov.Tensor(image_data)

prompt = "Can you describe the image?"
print(pipe.generate(prompt, image=image_data, max_new_tokens=100))
```

### Run generation using VLMPipeline in C++

Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details)
Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details). See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) for a demo application.

```cpp
#include "load_image.hpp"
Expand Down
29 changes: 23 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,31 @@ name = "openvino-genai"
version = "2024.5.0.0"
description = "Library of the most popular Generative AI model pipelines, optimized execution methods, and samples"
requires-python = ">=3.9"
readme = {file = "src/README.md", content-type="text/markdown"}
license = {text = "OSI Approved :: Apache Software License"}
readme = { file = "src/README.md", content-type="text/markdown" }
license = { "file" = "LICENSE" }
authors = [
{ name = "OpenVINO Developers", email = "[email protected]" },
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: Apache Software License",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries :: Python Modules",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Operating System :: Unix",
"Operating System :: POSIX :: Linux",
"Operating System :: Microsoft :: Windows",
"Operating System :: MacOS",
"Programming Language :: C++",
"Programming Language :: C",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: Implementation :: CPython"
]
dependencies = [
"openvino_tokenizers~=2024.5.0.0.dev"
Expand All @@ -22,22 +37,24 @@ dependencies = [
directory = "src/python"

[tool.py-build-cmake.sdist]
exclude = ["tools", "samples", "tests", "thirdparty"]
include = ["CMakeLists.txt", "LICENSE", "third-party-programs.txt", "SECURITY.md", "cmake", "src", "thirdparty"]

[tool.py-build-cmake.cmake]
minimum_version = "3.23"
build_type = "Release"
config = ["Release"]
find_python3 = true
build_args = ["--parallel", "--target", "py_openvino_genai"]
build_args = ["--parallel", "--target", "py_openvino_genai_stub"]
install_args = ["--strip"]
install_components = ["wheel_genai"]
options = {"BUILD_TOKENIZERS" = "OFF"}

[build-system]
requires = [
"py-build-cmake@git+https://github.com/tttapa/py-build-cmake@7ab73da351c7140f06d727a8705bece4cf544cd9",
"cmake~=3.23"
"py-build-cmake==0.3.1",
"pybind11-stubgen==2.5.1",
"openvino~=2024.5.0.0.dev",
"cmake~=3.23.0"
]
build-backend = "py_build_cmake.build"

Expand Down
3 changes: 2 additions & 1 deletion requirements-build.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
cmake~=3.30
cmake~=3.23.0
pybind11-stubgen==2.5.1
15 changes: 15 additions & 0 deletions src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,21 @@ else()
SOVERSION ${MAJOR_SUFFIX}${OpenVINOGenAI_VERSION_MINOR}${OpenVINOGenAI_VERSION_PATCH})
endif()

if(OpenVINODeveloperPackage_FOUND)
# must be called after all target_link_libraries
# ov_add_api_validator_post_build_step(TARGET ${TARGET_NAME})

ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME}
SOURCE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include")

# TODO: override versions as currently they come from OpenVINO
# ov_add_vs_version_file(NAME ${TARGET_NAME}
# FILEDESCRIPTION "OpenVINO GenAI library")

# TODO: commit changes separately
# ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
endif()

# - Windows: `<openvino_dir>\runtime\bin\intel64\Release\`
# - MacOS_x86: `<openvino_dir>/runtime/lib/intel64/Release`
# - MacOS_arm64: `<openvino_dir>/runtime/lib/arm64/Release/`
Expand Down
13 changes: 13 additions & 0 deletions src/cpp/include/openvino/genai/visual_language/pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,19 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
const StreamerVariant& streamer
);

/// @brief Generate a response given a prompt and uint8 RGB image with [NHWC] or [HWC] layout.
/// @param prompt A prompt to respond to.
/// @param image Image to be prepended to a prompt.
/// @param generation_config A config to follow for text generation.
/// @param streamer A streamer to acquire intermediate result.
/// @return A string generated by a model.
DecodedResults generate(
const std::string& prompt,
const ov::Tensor& rgb,
const GenerationConfig& generation_config,
const StreamerVariant& streamer
);

/// @brief Generate a response given a prompt and config.
/// @param prompt A prompt to respond to.
/// @param config_map A config may contain GenerationConfig, values
Expand Down
20 changes: 14 additions & 6 deletions src/cpp/src/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

#include "sequence_group.hpp"


namespace ov::genai {

class KVCacheBlock {
Expand Down Expand Up @@ -188,7 +187,10 @@ class CacheStateDumper;
*/
class BlockAllocator {
std::vector<std::list<KVCacheBlock::Ptr>> m_free_blocks;
int m_total_num_blocks;
// We keep m_free_blocks_num instead of m_free_blocks[X].size() to WA old CXX library implementation issue for std::list::size()
// see https://stackoverflow.com/questions/13157164/why-isnt-stdlist-size-constant-time
std::vector<size_t> m_free_blocks_num;
size_t m_total_num_blocks;
friend class CacheStateDumper;
size_t m_num_layers;
bool m_enable_prefix_caching;
Expand All @@ -202,8 +204,8 @@ class BlockAllocator {
* @param num_layers The number of separate attention layers with KV caches in the LLM associated with the pipeline.
* Blocks returned will be vectors with this size, each vector entry to be associated with a separate layer's KV cache.
*/
BlockAllocator(int num_blocks, bool enable_prefix_caching, size_t num_layers = 1) :
m_total_num_blocks(num_blocks), m_num_layers(num_layers), m_enable_prefix_caching(enable_prefix_caching), m_overwriteable_blocks(num_layers) {
BlockAllocator(size_t num_blocks, bool enable_prefix_caching, size_t num_layers = 1) :
m_free_blocks_num(num_layers, num_blocks), m_total_num_blocks(num_blocks), m_num_layers(num_layers), m_enable_prefix_caching(enable_prefix_caching), m_overwriteable_blocks(num_layers) {
OPENVINO_ASSERT(num_layers != 0, "num_layers must be non-zero");
m_free_blocks.resize(m_num_layers);
for (auto& per_layer_block_list : m_free_blocks) {
Expand All @@ -224,7 +226,7 @@ class BlockAllocator {
* @return Number of free blocks for this layer.
*/
size_t num_free_blocks(size_t layer_idx) const {
return m_free_blocks[layer_idx].size() + m_overwriteable_blocks.num_blocks();
return m_free_blocks_num[layer_idx] + num_overwriteable_blocks();
}

/**
Expand Down Expand Up @@ -270,6 +272,7 @@ class BlockAllocator {
block_ptr->release();
if (block_ptr->is_free()) {
m_free_blocks[layer_idx].push_back(block_ptr);
++m_free_blocks_num[layer_idx];
}
}

Expand Down Expand Up @@ -325,6 +328,7 @@ class BlockAllocator {
// actual collision case
for (size_t layer_idx = 0; layer_idx < colliding_blocks_per_layer.size(); layer_idx++) {
m_free_blocks[layer_idx].push_back(colliding_blocks_per_layer[layer_idx]);
++m_free_blocks_num[layer_idx];
}
}
m_overwriteable_blocks.add(blocks_for_all_layers);
Expand All @@ -333,12 +337,14 @@ class BlockAllocator {
// TODO (vshampor): more fine-grained hash store control
for (size_t layer_idx = 0; layer_idx < blocks_for_all_layers.size(); layer_idx++) {
m_free_blocks[layer_idx].push_back(blocks_for_all_layers[layer_idx]);
++m_free_blocks_num[layer_idx];
}
}
}
else {
for (size_t layer_idx = 0; layer_idx < blocks_for_all_layers.size(); layer_idx++) {
m_free_blocks[layer_idx].push_back(blocks_for_all_layers[layer_idx]);
++m_free_blocks_num[layer_idx];
}
}
}
Expand Down Expand Up @@ -368,6 +374,7 @@ class BlockAllocator {
KVCacheBlock::Ptr allocated_block = m_free_blocks[layer_idx].front();
allocated_block->increment();
m_free_blocks[layer_idx].pop_front();
--m_free_blocks_num[layer_idx];
return allocated_block;
}

Expand All @@ -386,7 +393,7 @@ class BlockAllocator {
OPENVINO_ASSERT(m_enable_prefix_caching);
OPENVINO_ASSERT(can_allocate_blocks(1));

if (m_free_blocks[0].size() > 0) {
if (m_free_blocks_num[0] > 0) {
// allocate new empty block
BlocksPerLayer allocated_blocks;
allocated_blocks.reserve(m_num_layers);
Expand All @@ -396,6 +403,7 @@ class BlockAllocator {
allocated_block->set_hash(hash);
allocated_blocks.push_back(allocated_block);
m_free_blocks[i].pop_front();
--m_free_blocks_num[i];
}
cached_blocks[hash] = allocated_blocks;
return allocated_blocks;
Expand Down
22 changes: 2 additions & 20 deletions src/cpp/src/continuous_batching_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl(

ov::Core core;

auto [core_properties, compile_properties] = utils::split_core_complile_config(properties);
auto [core_properties, compile_properties] = utils::split_core_compile_config(properties);
core.set_property(core_properties);

// The model can be compiled for GPU as well
Expand Down Expand Up @@ -57,7 +57,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::init(
}

SchedulerConfig updated_config = scheduler_config;
// update KV number in scheduler config
// update KV blocks number in scheduler config
if (scheduler_config.num_kv_blocks != device_config.get_num_kv_blocks()) {
updated_config.num_kv_blocks = device_config.get_num_kv_blocks();
}
Expand Down Expand Up @@ -166,24 +166,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
timer.start();
logits = m_model_runner->forward(m_requests, scheduler_output);
timer.end();

ov::InferRequest infer_request = m_model_runner->get_infer_request();
ov::CompiledModel compiled_model = infer_request.get_compiled_model();
const bool is_profiling_enabled = compiled_model.get_property(ov::enable_profiling);

// collect detailed statistic
if (is_profiling_enabled) {
std::vector<ov::ProfilingInfo> profiling_info = m_model_runner->get_infer_request().get_profiling_info();
for (const ov::ProfilingInfo& info : profiling_info) {
double current_time = info.real_time.count();
if (info.node_type == "PagedAttentionExtension") {
m_perf.m_paged_attention_time_ms += current_time;
} else if (info.node_type == "FullyConnected") {
m_perf.m_matmul_time_ms += current_time;
}
m_perf.m_infer_total_ms += current_time;
}
}
}

#ifdef DEBUG_CACHE_STATE_DUMP
Expand Down
Loading

0 comments on commit 8bef5a3

Please sign in to comment.