openvinotoolkit · Wovchena · Jun 7, 2024 · Mar 26, 2024 · Mar 28, 2024 · Apr 2, 2024
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -1,5 +1,9 @@
 version: 2
 updates:
+  - package-ecosystem: "pip"
+    directory: "./"
+    schedule:
+      interval: "weekly"
   - package-ecosystem: "pip"
     directory: "image_generation/stable_diffusion_1_5/cpp/scripts/"
     schedule:

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
diff --git a/.github/workflows/genai_package.yml b/.github/workflows/genai_package.yml
@@ -0,0 +1,63 @@
+name: genai_package
+on: pull_request
+jobs:
+  ubuntu_genai_package:
+    strategy:
+      matrix:
+        build-type: [Release, Debug]
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: mkdir ./ov/
+      - run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+      - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - run: sudo apt-get install libtbb-dev
+      - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/
+      - run: source ./ov/setupvars.sh && cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j
+      - run: source ./ov/setupvars.sh && cmake --install ./build/ --config ${{ matrix.build-type }} --prefix ov
+      - run: ov/samples/cpp/build_samples.sh -i ${{ github.workspace }}/s\ pace
+        if: ${{ 'Release' == matrix.build-type }}  # build_samples enforces Release build
+      - run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: source ./ov/setupvars.sh && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: source ./ov/setupvars.sh && timeout 50s ${{ github.workspace }}/s\ pace/samples_bin/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ ""
+        if: ${{ 'Release' == matrix.build-type }}
+
+  windows_genai_package:
+    strategy:
+      matrix:
+        build-type: [Release, Debug]
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15349-765302e0de1/w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64.zip
+      - run: unzip ov.zip
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --install ./build/ --config ${{ matrix.build-type }} --prefix w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\samples\cpp\build_samples_msvc.bat -i "${{ github.workspace }}/samples_install"
+        if: ${{ 'Release' == matrix.build-type }}  # build_samples enforces Release build
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && "${{ github.workspace }}/samples_install/samples_bin/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
+        if: ${{ 'Release' == matrix.build-type }}
diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml
@@ -0,0 +1,58 @@
+name: genai_python_lib
+on: pull_request
+jobs:
+  ubuntu_genai_python_lib:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: mkdir ./ov/
+      - run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14758-22bd6ff0494/l_openvino_toolkit_centos7_2024.1.0.dev20240315_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz  # Install CentOS7 instead of Ubuntu to match PyPI distribution ABI
+      - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j
+      - run: python -m pip install --pre openvino --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly  # Can't load CentOS libraries from the archive
+      # GitHub Actions already provides what is listed in ./requirements-build.txt but the internal
+      # build system doesn't. Install ./requirements-build.txt to detect possible conflicts.
+      - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt
+      - run: PYTHONPATH=./src/python/ python -c "from openvino_genai import LLMPipeline"
+      - run: source ./ov/setupvars.sh && CMAKE_BUILD_PARALLEL_LEVEL="" python -m pip install --pre . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+      - run: python -c "from openvino_genai import LLMPipeline"
+      - name: GenAI Python API tests
+        run: |
+          source ./ov/setupvars.sh
+          cd ./tests/python_tests/
+          python -m pip install -r requirements.txt
+          models=$(python list_test_models.py)
+          echo "$models" | while read -r model_name model_path; do
+              optimum-cli export openvino --trust-remote-code --weight-format fp16 --model "$model_name" "$model_path"
+          done
+          python -m pytest test_generate_api.py
+
+  windows_genai_python_lib:
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15349-765302e0de1/w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64.zip
+      - run: unzip ov.zip
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j
+      - run: python -m pip install "numpy<1.27"
+      # GitHub Actions already provides what is listed in ./requirements-build.txt but the internal
+      # build system doesn't. Install ./requirements-build.txt to detect possible conflicts.
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt
+      - run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -c "from openvino_genai import LLMPipeline"  # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
+      - run: set CMAKE_BUILD_PARALLEL_LEVEL=&& call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install .
+      - run: python -c "from openvino_genai import LLMPipeline"
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+# They are copied to python folder during the build to allow skipping wheel installation
+src/python/openvino_genai/*genai*
+src/python/openvino_genai/py_generate_pipeline*
+
 # build/artifact dirs
 _*
 [Bb]uild*/

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+cmake_minimum_required(VERSION 3.15)
+
+# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with
+# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options
+get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
+if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE)
+    message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used")
+    # Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...")
+endif()
+
+project(OpenVINOGenAI VERSION 2024.2.0.0)
+
+add_subdirectory(./thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
+add_subdirectory(src)
+add_subdirectory(text_generation/causal_lm/cpp)
+
+install(DIRECTORY text_generation/causal_lm/cpp/ DESTINATION samples/cpp/causal_lm COMPONENT cpp_samples_genai)
+install(FILES LICENSE DESTINATION licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
+install(FILES third-party-programs.txt DESTINATION licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)
+set(CPACK_GENERATOR "ZIP")
+include(CPack)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,41 @@
+[project]
+name = "openvino_genai"
+version = "2024.2.0.0"
+description = "Python bindings for https://github.com/openvinotoolkit/openvino.genai"
+requires-python = ">=3.8"
+readme = {file = "text_generation/causal_lm/cpp/README.md", content-type="text/markdown"}
+license = {text = "OSI Approved :: Apache Software License"}
+authors = [
+    { name = "OpenVINO Developers", email = "[email protected]" },
+]
+classifiers = [
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "openvino_tokenizers~=2024.1.0.0"
+]
+
+[tool.scikit-build]
+cmake.source-dir = "./"
+cmake.build-type = "Release"
+cmake.targets = ["py_generate_pipeline", "genai"]
+install.components = ["wheel_genai"]
+sdist.cmake = true
+wheel.packages = ["src/python/openvino_genai"]
+wheel.install-dir = "openvino_genai"
+wheel.build-tag = "000"
+wheel.license-files = ["LICENSE", "SECURITY.md", "third-party-programs.txt"]
+
+[[tool.scikit-build.generate]]
+path = "openvino_genai/__version__.py"
+template = '''
+__version__ = "${version}"
+'''
+
+[build-system]
+requires = ["scikit-build-core~=0.8.0"]  # See https://github.com/openvinotoolkit/openvino_tokenizers/pull/123
+build-backend = "scikit_build_core.build"
diff --git a/requirements-build.txt b/requirements-build.txt
@@ -0,0 +1,2 @@
+cmake~=3.23
+build~=1.2.1
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# Find OpenVINODeveloperPackage first to compile with SDL flags
+find_package(OpenVINODeveloperPackage QUIET
+             PATHS "${OpenVINO_DIR}")
+if(NOT OpenVINODeveloperPackage_FOUND)
+    find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+endif()
+
+add_subdirectory(cpp)
+add_subdirectory(python)
diff --git a/src/README.md b/src/README.md
@@ -0,0 +1,163 @@
+# OpenVINO Generate API
+
+## Usage 
+
+First of all you need to convert your model with optimum-cli
+``` sh
+optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format fp16 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0"
+pip install openvino-genai
+```
+
+`LLMPipeline` is the main object used for decoding. You can construct it straight away from the folder with the converted model. It will automatically load the main model, tokenizer, detokenizer and default generation configuration.
+
+### Python
+
+A minimalist example:
+```python
+import openvino_genai as ov_genai
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+print(pipe.generate("The Sun is yellow bacause"))
+```
+
+Calling generate with custom generation config parameters, e.g. config for grouped beam search
+```python
+import openvino_genai as ov_genai
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+
+result = pipe.generate("The Sun is yellow bacause", max_new_tokens=30, num_groups=3, group_size=5, diversity_penalty=1.5)
+print(result)
+```
+
+output:
+```
+'it is made up of carbon atoms. The carbon atoms are arranged in a linear pattern, which gives the yellow color. The arrangement of carbon atoms in'
+```
+
+A simples chat in python:
+```python
+import openvino_genai as ov_genai
+pipe = ov_ov_genai.LLMPipeline(model_path)
+
+config = {'num_groups': 3, 'group_size': 5, 'diversity_penalty': 1.5}
+pipe.set_generation_cofnig(config)
+
+pipe.start_chat()
+while True:
+    print('question:')
+    prompt = input()
+    if prompt == 'Stop!':
+        break
+    print(pipe(prompt))
+pipe.finish_chat()
+```
+
+Test to compare with Huggingface outputs
+
+### C++
+
+Minimalistc example
+```cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string model_path = argv[1];
+    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::cout << pipe.generate("The Sun is yellow bacause");
+}
+```
+
+Using Group Beam Search Decoding
+```cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string model_path = argv[1];
+    ov::genai::LLMPipeline pipe(model_path, "CPU");
+
+    ov::genai::GenerationConfig config = pipe.get_generation_config();
+    config.max_new_tokens = 256;
+    config.num_groups = 3;
+    config.group_size = 5;
+    config.diversity_penalty = 1.0f;
+
+    std::cout << pipe.generate("The Sun is yellow bacause", config);
+}
+```
+
+A simple chat in C++ using grouped beam search decoding
+``` cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string prompt;
+
+    std::string model_path = argv[1];
+    ov::genai::LLMPipeline pipe(model_path, "CPU");
+
+    ov::genai::GenerationConfig config = pipe.get_generation_config();
+    config.max_new_tokens = 256;
+    config.num_groups = 3;
+    config.group_size = 5;
+    config.diversity_penalty = 1.0f;
+
+    pipe.start_chat();
+    for (;;;) {
+        std::cout << "question:\n";
+        std::getline(std::cin, prompt);
+        if (prompt == "Stop!")
+            break;
+
+        std::cout << "answer:\n";
+        auto answer = pipe(prompt, config);
+        std::cout << answer << std::endl;
+    }
+    pipe.finish_chat();
+}
+```
+
+Streaming example with lambda function
+``` cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string model_path = argv[1];
+    ov::genai::LLMPipeline pipe(model_path, "CPU");
+
+    auto streamer = [](std::string word) { std::cout << word << std::flush; };
+    std::cout << pipe.generate("The Sun is yellow bacause", streamer);
+}
+```
+
+Streaming with a custom class
+``` cpp
+#include "openvino/genai/streamer_base.hpp"
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+class CustomStreamer: public ov::genai::StreamerBase {
-class CustomStreamer: public ov::genai::StreamerBase {
+class CustomStreamer: public ov::genai::IStreamer {
-class CustomStreamer: public ov::genai::StreamerBase {
+class CustomStreamer: public ov::genai::IStreamer {
+public:
+    void put(int64_t token) {
+        /* custom decoding/tokens processing code
+        tokens_cache.push_back(token);
+        std::string text = m_tokenizer.decode(tokens_cache);
+        ...
+        */
+    };
+
+    void end() {
+        /* custom finalization */
+    };
+};
+
+int main(int argc, char* argv[]) {
+    CustomStreamer custom_streamer;
+
+    std::string model_path = argv[1];
+    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::cout << pipe.generate("The Sun is yellow bacause", custom_streamer);
+}
+```
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		cmake~=3.23
		build~=1.2.1
Copy link Contributor ilya-lavrenov Jun 2, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. @Wovchena do you remember why we need it separately? Copy link Collaborator Wovchena Jun 3, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. It's the @akladiev's request. That enables `python -m build --wheel --outdir {GENAI_BUILD_PY_DIR}'`. Copy link Contributor ilya-lavrenov Jun 3, 2024 • edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. is it possible to use: `export OpenVINO_DIR=xxx # or call setupvars.sh python -m pip wheel -v --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release <GenAI source dir>` Pip will build `openvino_genai-2024.2.0.0-000-cp310-cp310-manylinux_2_35_x86_64.whl` to current folder See https://pip.pypa.io/en/stable/cli/pip_wheel/ Copy link Collaborator Wovchena Jun 3, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. It also puts all other `.whl` to the same dir which isn't desirable when the intention is to build one `.whl`. Copy link Contributor ilya-lavrenov Jun 3, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. (test_env) devuser@ov-spr-19:~/ilavreno/openvino.genai$ python -m pip wheel -v --no-deps --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release . ... (test_env) devuser@ov-spr-19:~/ilavreno/openvino.genai$ ls .whl openvino_genai-2024.2.0.0-000-cp310-cp310-manylinux_2_35_x86_64.whl Copy link Collaborator Wovchena* Jun 3, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Yes, I guess `--no-deps` may help. @akladiev, Can you move to `pip wheel` so `requirements-build.txt` could be removed?