openvinotoolkit · pavel-esir · Mar 26, 2024 · Mar 28, 2024 · Apr 2, 2024 · Apr 3, 2024
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
diff --git a/.github/workflows/genai_lib.yml b/.github/workflows/genai_lib.yml
@@ -0,0 +1,42 @@
+name: genai_lib
+on: pull_request
+jobs:
+  genai_lib_ubuntu:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: mkdir ./ov/
+      - run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14758-22bd6ff0494/l_openvino_toolkit_centos7_2024.1.0.dev20240315_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz  # Install CentOS7 instead of Ubuntu to match PyPI distribution ABI
+      - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j
+      - run: python -m pip install openvino  # Can't load CentOS libraries from the archive
+      - run: PYTHONPATH=./src/python/ python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
+      - run: source ./ov/setupvars.sh && python -m pip install --pre --upgrade . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+      - run: python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
+
+  genai_lib_windows:
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64.zip
+      - run: unzip ov.zip
+      - run: call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j
+      - run: python -m pip install "numpy<1.27"
+      - run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat && python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"  # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
+      - run: call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat && python -m pip install .
+      - run: python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+# They are copied to python folder during the build to allow skipping wheel installation
+src/python/openvino_genai/*generate_pipeline_lib*
+src/python/openvino_genai/py_generate_pipeline*
+
 # build/artifact dirs
 _*
 [Bb]uild*/

diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,9 @@
 [submodule "thirdparty/openvino_tokenizers"]
     path = thirdparty/openvino_tokenizers
     url = https://github.com/openvinotoolkit/openvino_tokenizers.git
+[submodule "thirdparty/nlohmann_json"]
+	path = thirdparty/nlohmann_json
+	url = https://github.com/nlohmann/json.git
+[submodule "thirdparty/Jinja2Cpp"]
+	path = thirdparty/Jinja2Cpp
+	url = https://github.com/jinja2cpp/Jinja2Cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+cmake_minimum_required(VERSION 3.15)
+
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
+set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebInfo" "MinSizeRel")
+
+project(openvino_genai)
+
+add_subdirectory(./thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
+add_subdirectory(src)
+add_subdirectory(text_generation/causal_lm/cpp)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,41 @@
+[project]
+name = "openvino_genai"
+version = "2024.2.0.0"
+description = "Python bindings for https://github.com/openvinotoolkit/openvino.genai"
+requires-python = ">=3.8"
+readme = {file = "text_generation/causal_lm/cpp/README.md", content-type="text/markdown"}
+license = {text = "OSI Approved :: Apache Software License"}
+authors = [
+    { name = "OpenVINO Developers", email = "[email protected]" },
+]
+classifiers = [
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "openvino_tokenizers~=2024.1.0.0"
+]
+
+[tool.scikit-build]
+cmake.source-dir = "./"
+cmake.build-type = "Release"
+cmake.targets = ["py_generate_pipeline", "generate_pipeline_lib"]
+install.components = ["genai", "genai_python"]
+sdist.cmake = true
+wheel.packages = ["src/python/openvino_genai"]
+wheel.install-dir = "openvino_genai"
+wheel.build-tag = "000"
+wheel.license-files = ["LICENSE", "SECURITY.md", "third-party-programs.txt"]
+
+[[tool.scikit-build.generate]]
+path = "openvino_genai/__version__.py"
+template = '''
+__version__ = "${version}"
+'''
+
+[build-system]
+requires = ["scikit-build-core~=0.8.0"]  # See https://github.com/openvinotoolkit/openvino_tokenizers/pull/123
+build-backend = "scikit_build_core.build"
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# Find OpenVINODeveloperPackage first to compile with SDL flags
+find_package(OpenVINODeveloperPackage QUIET
+             PATHS "${OpenVINO_DIR}")
+if(NOT OpenVINODeveloperPackage_FOUND)
+    find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+endif()
+
+add_subdirectory(cpp)
+add_subdirectory(python)
diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
@@ -0,0 +1,68 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# Dependencies
+
+include(FetchContent)
+
+FetchContent_Declare(nlohmann_json
+    URL https://github.com/nlohmann/json/archive/refs/tags/v3.11.3.tar.gz
+    URL_HASH SHA256=0d8ef5af7f9794e3263480193c491549b2ba6cc74bb018906202ada498a79406)
+FetchContent_MakeAvailable(nlohmann_json)
+
+function(ov_genai_build_jinja2cpp)
+    FetchContent_Declare(jinja2cpp
+        URL https://github.com/ilya-lavrenov/Jinja2Cpp/archive/a5d002cbf44469775556daea14ba3ccdba1e365a.tar.gz
+        URL_HASH SHA256=5aa5378d9acf3c44dfb607fd7f16f48b17ffa6495c219957901e9191ffe28900)
+
+    FetchContent_GetProperties(jinja2cpp)
+    if(NOT jinja2cpp_POPULATED)
+        FetchContent_Populate(jinja2cpp)
+
+        set(BUILD_SHARED_LIBS OFF)
+        set(JINJA2CPP_INSTALL OFF CACHE BOOL "")
+        set(JINJA2CPP_CXX_STANDARD 17 CACHE STRING "")
+        set(JINJA2CPP_BUILD_SHARED OFF CACHE BOOL "")
+        set(JINJA2CPP_USE_REGEX "std" CACHE STRING "")
+        set(JINJA2CPP_WITH_JSON_BINDINGS "none" CACHE STRING "")
+        set(JINJA2CPP_STRICT_WARNINGS OFF CACHE BOOL "")
+        set(JINJA2CPP_PIC ON CACHE BOOL "")
+
+        add_subdirectory("${jinja2cpp_SOURCE_DIR}" "${jinja2cpp_BINARY_DIR}" EXCLUDE_FROM_ALL)
+        # openvino::runtime exports _GLIBCXX_USE_CXX11_ABI=0 on CentOS7.
+        # It needs to be propagated to every library GenAI links with.
+        # It's enough to propagate to fmt, because fmt propagates to
+        # jinja2cpp.
+        target_compile_definitions(fmt PUBLIC $<TARGET_PROPERTY:openvino::runtime,INTERFACE_COMPILE_DEFINITIONS>)
+    endif()
+endfunction()
+
+ov_genai_build_jinja2cpp()
+
+# Library
+
+file(GLOB SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp")
+
+set(TARGET_NAME generate_pipeline_lib)
+add_library(${TARGET_NAME} SHARED ${SOURCE_FILES})
+
+target_include_directories(${TARGET_NAME}
+    # TODO: remove it, because beam_search algo should not be exposed to end users
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../text_generation/causal_lm/cpp/
+    PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE nlohmann_json::nlohmann_json jinja2cpp)
+
+target_compile_definitions(${TARGET_NAME} PRIVATE OPENVINO_TOKENIZERS_PATH=\"$<TARGET_FILE:openvino_tokenizers>\")
+
+target_compile_features(${TARGET_NAME} PUBLIC cxx_std_17)
+
+install(TARGETS ${TARGET_NAME} LIBRARY DESTINATION . COMPONENT genai RUNTIME DESTINATION . COMPONENT genai)
+
+# Populate python with the libraries to allow skipping wheel installation
+add_custom_command(TARGET generate_pipeline_lib POST_BUILD
+    COMMAND "${CMAKE_COMMAND}" -E copy
+        "$<TARGET_FILE:generate_pipeline_lib>"
+        "${CMAKE_CURRENT_SOURCE_DIR}/../python/openvino_genai/$<TARGET_FILE_NAME:generate_pipeline_lib>"
+    COMMENT "Copy generate_pipeline_lib to src/python/openvino_genai")
diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -0,0 +1,93 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <limits>
+#include <variant>
+#include <string>
+
+#include "openvino/runtime/compiled_model.hpp"
+#include "openvino/runtime/infer_request.hpp"
+#include "openvino/genai/tokenizer.hpp"
+
+namespace ov {
+
+/**
+ * @brief controls the stopping condition for grouped beam search. The following values are  possible:
+ *        "early", where the generation stops as soon as there are `num_beams` complete candidates; "heuristic", where an 
+ *        heuristic is applied and the generation stops when is it very unlikely to find better candidates;
+ */
+enum class StopCriteria { early, heuristic, never };
+
+/**
+ * @brief structure to keep generation config parameters.
+ * 
+ * @param max_length the maximum length the generated tokens can have. Corresponds to the length of the input prompt +
+ *        `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
+ * @param max_new_tokens the maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
+ * @param ignore_eos if set to true, then generation will not stop even if <eos> token is met.
+ * @param num_beams  number of beams for beam search. 1 means no beam search.
+ * @param num_beam_groups number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
+ * @param diversity_penalty this value is subtracted from a beam's score if it generates a token same as any beam from other group at a
+ *        particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled.
+ * @param length_penalty exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
+ *        the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
+ *        likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while
+ *        `length_penalty` < 0.0 encourages shorter sequences.
+ * @param num_return_sequences the number of sequences to return for grouped beam search decoding
+ * @param no_repeat_ngram_size if set to int > 0, all ngrams of that size can only occur once.
+ * @param stop_criteria controls the stopping condition for grouped beam search. It accepts the following values: 
+ *        "early", where the generation stops as soon as there are `num_beams` complete candidates; "heuristic", where an 
+ *        heuristic is applied and the generation stops when is it very unlikely to find better candidates;
+ *        "never", where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm).
+ * @param temperature the value used to modulate token probabilities for random sampling
+ * @param top_p if set to float < 1, only the smallest set of most probable tokens with probabilities 
+ * @param top_k the number of highest probability vocabulary tokens to keep for top-k-filtering.
+ * @param do_sample whether or not to use multinomial random sampling
+ *        that add up to `top_p` or higher are kept.
+ * @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty. 
+ * @param pad_token_id id of padding token
+ * @param bos_token_id id of <bos> token
+ * @param eos_token_id id of <eos> token
+ * @param bos_token <bos> token string representation
+ * @param eos_token <eos> token string representation
+ * @param draft_model draft model for assitive decoding
+ */
+class OPENVINO_GENAI_EXPORTS GenerationConfig {
+public:
+    GenerationConfig() = default;
+    GenerationConfig(std::string json_path);
+
+    // Generic
+    size_t max_new_tokens = SIZE_MAX;
+    size_t max_length = SIZE_MAX;
+    bool ignore_eos = false;
+
+    // Beam search specific
+    size_t num_beam_groups = 1;
+    size_t num_beams = 1;
+    float diversity_penalty = 1.0f;
+    float length_penalty = 1.0f;
+    size_t num_return_sequences = 1;
+    size_t no_repeat_ngram_size = std::numeric_limits<size_t>::max();
+    StopCriteria stop_criteria = StopCriteria::heuristic;
+
+    // Multinomial
+    float temperature = 0.0f;
+    float top_p = 1.0f;
+    int top_k = -1;
+    bool do_sample = false;
+    float repetition_penalty = 1.0f;
+
+    // special tokens
+    int64_t pad_token_id = 0;
+    int64_t bos_token_id = 1;
+    int64_t eos_token_id = 2;
+
+    // used for chat scenario
+    std::string bos_token = "<s>";
+    std::string eos_token = "</s>";
+};
+
+} // namespace ov