Skip to content

Commit

Permalink
Add extension near to genai library, tokenizers from fork (#11)
Browse files Browse the repository at this point in the history
* enable

* libtbb-dev

* move

* slash

* install

* core_genai_dev

* remove export

* rreorganaise components

* add SOVERSION, and requirements-build.txt

* repalce SKBUILD with EXCLUDE_FROM_ALL because the effect is the same

* fix NAMELINK_COMPONENT

* remove extraline

* add soft restrictions

* Fix build to unblock packaging

* improve naming

* install samples

* remove quotes

* use main target name because an alias can't be specified in cmake --target

* define CMAKE_BUILD_PARALLEL_LEVEL

* Ensure ./requirements-build.txt won't outdate

* Use ./requirements-build.txt in python lib build

* Add missing &&

* Test Debug

* add matrix for windows_genai_package

* openvino_tokenizers from form

* update openvino_tokenizers

* update openvino_tokenizers

* update openvino_tokenizers

* revert openvino_tokenizers

* tokenizers from fork

* update tokenizers

* centos7_2024.2.0.dev

* copy target

* revert tokenizers

* reapply useful changes

* copy so only

* Update tokenizers, centos7_2024.2.0.dev

* single thread

* ubuntu22

* nightyl

* --pre --extra-index-url

* update tokenizers

* space

* move --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly

* release tokenizers

* merge

* downgrade tokenizers

* downgrade

* two steps

* downgrade tokenizers

* dont setupvars

* source

* fix

* submodule

* releases/2024/2 tokenizers

* fix-2

* rebase

* use make

* comment

* CMAKE_GENERATOR=Unix Makefiles

* update openvino

* space

* optimum-cli from fork

* different commit

* from branch

* remove exrtra-index for SD

* reorder pip install

* revert unwanted changes

* Ubuntu-22

* openvino_tokenizers~=2024.2.0.0

* remove -pre . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly

* upgrade to prerelease

* revert requirements.txt

* remove --pre, setupvars

* get openvino_tokenizers._ext_path

* take release pybind, fix soversion, and tokenizers folder

* spelling

* dont copy libs

* put ov_tokenizers_path back

* GENAI_BUILD_DIR=../../build

* Add extension near to genai library

* include openvino/util/file_util.hpp

* get_absolute_file_path

* remove namepsace

* #    include <limits.h>

* more than one .

* till next dot

* _ext_path

* -1

* +1

* +1

* path

* ext name

* with_openvino_tokenizers

* char

* revert test

* tokenizers from form

* update fork

* lib

* fix cherry-pick

* update fork

* dont spoil source dir

* Generator expressions to disable appending a per-configuration subdirectory

* remove versions

* fix path

* try

* try

* verbose

* spelling

* rename file

* remove build.tool-args

* Release

* dont speciify targets

* revert 81ec069
  • Loading branch information
Wovchena authored May 29, 2024
1 parent 81ec069 commit 88c44fe
Show file tree
Hide file tree
Showing 15 changed files with 200 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ jobs:
shell: cmd
run: |
call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat
set PATH=.\build\src\cpp\Release;%PATH%
set PATH=.\build\openvino_genai\;%PATH%
.\build\text_generation\causal_lm\cpp\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
echo import transformers > ref.py
Expand Down
19 changes: 11 additions & 8 deletions .github/workflows/genai_python_lib.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ jobs:
env:
# A tokenizers' dependency fails to compile with Ninja in CenOS7 env
CMAKE_GENERATOR: Unix Makefiles
CMAKE_BUILD_PARALLEL_LEVEL: null
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -21,9 +22,9 @@ jobs:
- run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j
# GitHub Actions already provides what is listed in ./requirements-build.txt but the internal
# build system doesn't. Install ./requirements-build.txt to detect possible conflicts.
- run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
- run: source ./ov/setupvars.sh && PYTHONPATH=./src/python/ python -c "from openvino_genai import LLMPipeline"
- run: source ./ov/setupvars.sh && CMAKE_BUILD_PARALLEL_LEVEL="" python -m pip install .
- run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --verbose --verbose --verbose
- run: source ./ov/setupvars.sh && PYTHONPATH=./build/ python -c "from openvino_genai import LLMPipeline"
- run: source ./ov/setupvars.sh && python -m pip install . --config-settings=build-dir="build" --verbose --verbose --verbose
- run: python -c "from openvino_genai import LLMPipeline"
- name: GenAI Python API tests
run: |
Expand All @@ -37,6 +38,8 @@ jobs:
windows_genai_python_lib:
runs-on: windows-latest
env:
CMAKE_BUILD_PARALLEL_LEVEL: null
defaults:
run:
shell: cmd
Expand All @@ -49,11 +52,11 @@ jobs:
python-version: 3.8
- run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/windows/w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64.zip
- run: unzip ov.zip
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j
# GitHub Actions already provides what is listed in ./requirements-build.txt but the internal
# build system doesn't. Install ./requirements-build.txt to detect possible conflicts.
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
- run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -c "from openvino_genai import LLMPipeline" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
- run: set CMAKE_BUILD_PARALLEL_LEVEL=&& call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pip install .
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --verbose --verbose --verbose
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pip install . --verbose --verbose --verbose # --verbose is additive, and can be used up to 3 times.
- run: python -c "from openvino_genai import LLMPipeline"
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j
- run: set "PYTHONPATH=./build/" && call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -c "from openvino_genai import LLMPipeline" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# They are copied to python folder during the build to allow skipping wheel installation
src/python/openvino_genai/*genai*
src/python/openvino_genai/py_generate_pipeline*

# build/artifact dirs
_*
[Bb]uild*/
Expand Down
29 changes: 29 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,35 @@ endif()
project(OpenVINOGenAI VERSION 2024.2.0.0)

add_subdirectory(./thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
# Put binaries to a single dir to mimic package structure.
set_target_properties(openvino_tokenizers PROPERTIES
# Generator expressions to disable appending a per-configuration subdirectory (Release, Debug).
# ARCHIVE_OUTPUT is irrelevant. It's here just to keep all the artifacts in one place.
ARCHIVE_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
RUNTIME_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
)
if(TARGET core_tokenizers)
set_target_properties(core_tokenizers PROPERTIES
ARCHIVE_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
RUNTIME_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
)
else()
# Prebuilt dependencies
if(WIN32)
set(extra_libs "${CMAKE_BINARY_DIR}/_deps/fast_tokenizer-src/lib/core_tokenizers.dll"
"${CMAKE_BINARY_DIR}/_deps/fast_tokenizer-src/third_party/lib/icudt70.dll"
"${CMAKE_BINARY_DIR}/_deps/fast_tokenizer-src/third_party/lib/icuuc70.dll")
elseif(LINUX)
set(extra_libs "${CMAKE_BINARY_DIR}/_deps/fast_tokenizer-src/lib/libcore_tokenizers.so")
elseif(APPLE)
set(extra_libs "${CMAKE_BINARY_DIR}/_deps/fast_tokenizer-src/lib/libcore_tokenizers.dylib")
endif()
add_custom_command(OUTPUT "${extra_libs}"
COMMAND "${CMAKE_COMMAND}" -E copy "${extra_libs}" "${CMAKE_BINARY_DIR}/openvino_genai/"
DEPENDS openvino_tokenizers)
endif()
add_subdirectory(src)
add_subdirectory(text_generation/causal_lm/cpp)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ dependencies = [
[tool.scikit-build]
cmake.source-dir = "./"
cmake.build-type = "Release"
cmake.targets = ["py_generate_pipeline", "genai"]
install.components = ["wheel_genai"]
sdist.cmake = true
wheel.packages = ["src/python/openvino_genai"]
Expand All @@ -37,5 +36,6 @@ __version__ = "${version}"
'''

[build-system]
# TODO: add build.tool-args = ["--parallel"] after scikit-build-core is updated to 0.9.4+.
requires = ["scikit-build-core~=0.8.0", "cmake~=3.23"] # See https://github.com/openvinotoolkit/openvino_tokenizers/pull/123
build-backend = "scikit_build_core.build"
15 changes: 5 additions & 10 deletions src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,26 @@ file(GLOB SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp")

set(TARGET_NAME genai)
add_library(${TARGET_NAME} SHARED ${SOURCE_FILES})
add_library(openvino::${TARGET_NAME} ALIAS ${TARGET_NAME})
add_library(openvino::genai ALIAS ${TARGET_NAME})

target_include_directories(${TARGET_NAME}
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:runtime/include>")

target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE nlohmann_json::nlohmann_json jinja2cpp)

target_compile_definitions(${TARGET_NAME} PRIVATE OPENVINO_TOKENIZERS_PATH=\"$<TARGET_FILE:openvino_tokenizers>\")

target_compile_features(${TARGET_NAME} PUBLIC cxx_std_17)

# Extract two last digits from CMAKE_PROJECT_VERSION_MAJOR because SOVERSION can only contain up to 4 symbols.
string(REGEX MATCH [=[[0-9][0-9]$]=] MAJOR_SUFFIX ${CMAKE_PROJECT_VERSION_MAJOR})
set_target_properties(${TARGET_NAME} PROPERTIES
OUTPUT_NAME openvino_genai
VERSION ${CMAKE_PROJECT_VERSION}
SOVERSION ${MAJOR_SUFFIX}${CMAKE_PROJECT_VERSION_MINOR}${CMAKE_PROJECT_VERSION_PATCH}
ARCHIVE_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
RUNTIME_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
)

# Copy the library to python to allow skipping wheel installation
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND "${CMAKE_COMMAND}" -E copy
"$<TARGET_FILE:${TARGET_NAME}>"
"${CMAKE_CURRENT_SOURCE_DIR}/../python/openvino_genai/$<TARGET_FILE_NAME:${TARGET_NAME}>"
COMMENT "Copy ${TARGET_NAME} to src/python/openvino_genai")

find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
install(TARGETS ${TARGET_NAME}
LIBRARY DESTINATION python/openvino_genai/ COMPONENT pygenai_${Python_VERSION_MAJOR}_${Python_VERSION_MINOR}
Expand Down
1 change: 1 addition & 0 deletions src/cpp/include/openvino/genai/llm_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
* @param model_path Path to the dir model xml/bin files, tokenizers and generation_configs.json
* @param device optional device
* @param plugin_config optional plugin_config
* @param ov_tokenizers_path optional path to an extension to add. Empty adds openvino_tokenizers from openvini_genai library folder.
*/
LLMPipeline(const std::string& path, const std::string& device="CPU",
const ov::AnyMap& plugin_config={},
Expand Down
66 changes: 65 additions & 1 deletion src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,33 @@
#include "utils.hpp"
#include "text_callback_streamer.hpp"

#ifdef _WIN32
# include <windows.h>
# define MAX_ABS_PATH _MAX_PATH
# define get_absolute_path(result, path) _fullpath(result, path.c_str(), MAX_ABS_PATH)
#else
# include <dlfcn.h>
# include <limits.h>
# define MAX_ABS_PATH PATH_MAX
# define get_absolute_path(result, path) realpath(path.c_str(), result)
namespace {
std::string get_absolute_file_path(const std::string& path) {
std::string absolutePath;
absolutePath.resize(MAX_ABS_PATH);
std::ignore = get_absolute_path(&absolutePath[0], path);
if (!absolutePath.empty()) {
// on Linux if file does not exist or no access, function will return NULL, but
// `absolutePath` will contain resolved path
absolutePath.resize(absolutePath.find('\0'));
return std::string(absolutePath);
}
std::stringstream ss;
ss << "Can't get absolute file path for [" << path << "], err = " << strerror(errno);
throw std::runtime_error(ss.str());
}
}
#endif

namespace {

ov::genai::GenerationConfig from_config_json_if_exists(const std::string& path) {
Expand Down Expand Up @@ -56,6 +83,39 @@ std::string from_tokenizer_json_if_exists(const std::string& path) {
return res;
}

std::filesystem::path with_openvino_tokenizers(const std::filesystem::path& path) {
#ifdef _WIN32
constexpr char tokenizers[] = "openvino_tokenizers.dll";
#elif __linux__
constexpr char tokenizers[] = "libopenvino_tokenizers.so";
#elif __APPLE__
constexpr char tokenizers[] = "libopenvino_tokenizers.dylib";
#endif
return path.parent_path() / tokenizers;
}

std::string get_ov_genai_library_path() {
#ifdef _WIN32
CHAR genai_library_path[MAX_PATH];
HMODULE hm = NULL;
if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
reinterpret_cast<LPSTR>(get_ov_genai_library_path),
&hm)) {
std::stringstream ss;
ss << "GetModuleHandle returned " << GetLastError();
throw std::runtime_error(ss.str());
}
GetModuleFileNameA(hm, (LPSTR)genai_library_path, sizeof(genai_library_path));
return std::string(genai_library_path);
#elif defined(__APPLE__) || defined(__linux__) || defined(__EMSCRIPTEN__)
Dl_info info;
dladdr(reinterpret_cast<void*>(get_ov_genai_library_path), &info);
return get_absolute_file_path(info.dli_fname).c_str();
#else
# error "Unsupported OS"
#endif // _WIN32
}

}

namespace ov {
Expand Down Expand Up @@ -161,7 +221,11 @@ ov::genai::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(
const std::string& ov_tokenizers_path
):
m_model_runner{ov::Core{}.compile_model(path + "/openvino_model.xml", device, config).create_infer_request()},
m_tokenizer{Tokenizer(path, device, ov_tokenizers_path)},
m_tokenizer{
ov_tokenizers_path.empty()
? Tokenizer(path, device, with_openvino_tokenizers(get_ov_genai_library_path()).string())
: Tokenizer(path, device, ov_tokenizers_path)
},
m_generation_config{from_config_json_if_exists(path)},
m_chat_template{from_tokenizer_json_if_exists(path)}
{}
Expand Down
7 changes: 1 addition & 6 deletions src/cpp/src/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,7 @@ class Tokenizer::TokenizerImpl {
if (ov::genai::utils::is_xml(tokenizers_path))
OPENVINO_THROW("tokenizers_path should be a path to a dir not a xml file");

if (ov_tokenizers_path.empty()) {
// OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt
core.add_extension(OPENVINO_TOKENIZERS_PATH);
} else {
core.add_extension(ov_tokenizers_path + "/libopenvino_tokenizers.so");
}
core.add_extension(ov_tokenizers_path);
std::shared_ptr<ov::Model> tokenizer_model, detokenizer_model;
try {
tokenizer_model = core.read_model(tokenizers_path + "/openvino_tokenizer.xml");
Expand Down
27 changes: 14 additions & 13 deletions src/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@ endif()

pybind11_add_module(py_generate_pipeline py_generate_pipeline.cpp)
target_link_libraries(py_generate_pipeline PRIVATE openvino::genai)
set_target_properties(py_generate_pipeline PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py" DESTINATION "${CMAKE_BINARY_DIR}/openvino_genai/")
write_file("${CMAKE_BINARY_DIR}/openvino_genai/__version__.py" "__version__ = \"${CMAKE_PROJECT_VERSION}\"")

# setting RPATH / LC_RPATH depending on platform
if(LINUX)
# to find libgenai.so in the same folder
# to find libopenvino_genai.so in the same folder
set(rpaths "$ORIGIN")
elseif(APPLE)
# to find libgenai.dylib in the same folder
# to find libopenvino_genai.dylib in the same folder
set(rpaths "@loader_path")
if(DEFINED SKBUILD)
# in case we build pip package, we need to refer to libopenvino.dylib from 'openvino' package
Expand All @@ -35,17 +40,13 @@ if(rpaths)
set_target_properties(py_generate_pipeline PROPERTIES INSTALL_RPATH "${rpaths}")
endif()

# Copy the library to python to allow skipping wheel installation
add_custom_command(TARGET py_generate_pipeline POST_BUILD
COMMAND "${CMAKE_COMMAND}" -E copy
"$<TARGET_FILE:py_generate_pipeline>"
"${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/$<TARGET_FILE_NAME:py_generate_pipeline>"
COMMENT "Copy py_generate_pipeline to src/python/openvino_genai/")

find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/ DESTINATION python/openvino_genai/ COMPONENT pygenai_${Python_VERSION_MAJOR}_${Python_VERSION_MINOR})
install(TARGETS py_generate_pipeline LIBRARY DESTINATION python/openvino_genai/ COMPONENT pygenai_${Python_VERSION_MAJOR}_${Python_VERSION_MINOR})
install(FILES "${CMAKE_BINARY_DIR}/openvino_genai/__init__.py" "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py" DESTINATION python/openvino_genai/ COMPONENT pygenai_${Python_VERSION_MAJOR}_${Python_VERSION_MINOR})
install(TARGETS genai py_generate_pipeline LIBRARY DESTINATION python/openvino_genai/ COMPONENT pygenai_${Python_VERSION_MAJOR}_${Python_VERSION_MINOR})

# wheel_genai component is used for wheel generation in pyproject.toml.
# Exclude wheel_genai from normal packaging process.
install(TARGETS genai py_generate_pipeline LIBRARY DESTINATION . COMPONENT wheel_genai RUNTIME DESTINATION . COMPONENT wheel_genai EXCLUDE_FROM_ALL)
# Exclude wheel_genai from normal packaging because there's pygenai_X_Y component for that.
install(TARGETS genai py_generate_pipeline
LIBRARY DESTINATION . COMPONENT wheel_genai
RUNTIME DESTINATION . COMPONENT wheel_genai
EXCLUDE_FROM_ALL)
2 changes: 1 addition & 1 deletion src/python/openvino_genai/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# this property will be overwritten by value from pyproject.toml
# Will be overwritten by pyproject.toml or cmake.
__version__ = "0.0.0.0"
Loading

0 comments on commit 88c44fe

Please sign in to comment.