From 18ca12b18f87f1987037dc63b0a0dc2522203551 Mon Sep 17 00:00:00 2001 From: mzegla Date: Fri, 8 Nov 2024 10:54:31 +0100 Subject: [PATCH 01/10] drop sorting and shrinking outputs --- src/llm/apis/openai_completions.cpp | 7 ++----- src/llm/http_llm_calculator.cc | 4 ---- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp index 446fb4dc7b..474c1db06a 100644 --- a/src/llm/apis/openai_completions.cpp +++ b/src/llm/apis/openai_completions.cpp @@ -416,13 +416,10 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect // choices: array of size N, where N is related to n request parameter writer.String("choices"); writer.StartArray(); // [ - int i = 0; + int index = 0; int n = request.numReturnSequences.value_or(1); usage.completionTokens = 0; for (const ov::genai::GenerationOutput& generationOutput : generationOutputs) { - if (i >= n) - break; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated tokens: {}", generationOutput.generated_ids); usage.completionTokens += generationOutput.generated_ids.size(); if (request.echo) @@ -445,7 +442,7 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect } // index: integer; Choice index, only n=1 supported anyway writer.String("index"); - writer.Int(i++); + writer.Int(index++); // logprobs: object/null; Log probability information for the choice. TODO writer.String("logprobs"); if (this->request.logprobschat || this->request.logprobs > 0) { diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index 0e144385ff..ece7359090 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -189,10 +189,6 @@ class HttpLLMCalculator : public CalculatorBase { return absl::CancelledError(); } RET_CHECK(generationOutputs.size() >= 1); - std::sort(generationOutputs.begin(), generationOutputs.end(), [](ov::genai::GenerationOutput& r1, ov::genai::GenerationOutput& r2) { - return r1.score > r2.score; - }); - std::string response = this->apiHandler->serializeUnaryResponse(generationOutputs); SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Complete unary response: {}", response); cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, timestamp); From bc4092ed7cc5b6bd7f2e893da8494ee0bb93085a Mon Sep 17 00:00:00 2001 From: mzegla Date: Mon, 18 Nov 2024 11:52:57 +0100 Subject: [PATCH 02/10] update versions --- Dockerfile.redhat | 14 +++++++------- Dockerfile.ubuntu | 10 +++++----- Makefile | 8 ++++---- create_package.sh | 4 ++-- src/llm/apis/openai_completions.cpp | 1 - third_party/llm_engine/llm_engine.bzl | 6 +++--- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/Dockerfile.redhat b/Dockerfile.redhat index ac190607d6..487738adba 100644 --- a/Dockerfile.redhat +++ b/Dockerfile.redhat @@ -199,7 +199,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \ ln -s /openvino/inference-engine/temp/opencv_*/opencv /opt/intel/openvino/extras && \ ln -s /usr/local/runtime /opt/intel/openvino && \ ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \ - ln -s /opt/intel/openvino /opt/intel/openvino_2024 + ln -s /opt/intel/openvino /opt/intel/openvino_2025 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/Release/python /opt/intel/openvino/ RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python ################## END OF OPENVINO SOURCE BUILD ###################### @@ -212,7 +212,7 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025 # update oneTBB RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \ @@ -300,13 +300,13 @@ WORKDIR /ovms/src/example/SampleCpuExtension/ RUN make RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \ - mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \ - echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA + mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \ + echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA ENV PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding WORKDIR /ovms -ARG PROJECT_VERSION="2024.5" +ARG PROJECT_VERSION="2025.0" ARG PROJECT_NAME="OpenVINO Model Server" LABEL description=${PROJECT_NAME} ARG minitrace_flags @@ -398,8 +398,8 @@ RUN if [ -f /ovms_release/lib/libovms_shared.so ] ; then mv /ovms_release/lib/li FROM $RELEASE_BASE_IMAGE as release LABEL "name"="OVMS" LABEL "vendor"="Intel Corporation" -LABEL "version"="2024.5" -LABEL "release"="2024" +LABEL "version"="2025.0" +LABEL "release"="2025" LABEL "summary"="OpenVINO(TM) Model Server" LABEL "description"="OpenVINO(TM) Model Server is a solution for serving AI models" ARG INSTALL_RPMS_FROM_URL= diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu index b9069ea10d..f37599f40b 100644 --- a/Dockerfile.ubuntu +++ b/Dockerfile.ubuntu @@ -167,7 +167,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \ mkdir -p /opt/intel/openvino && \ ln -s /usr/local/runtime /opt/intel/openvino && \ ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \ - ln -s /opt/intel/openvino /opt/intel/openvino_2024 + ln -s /opt/intel/openvino /opt/intel/openvino_2025 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/${CMAKE_BUILD_TYPE}/python /opt/intel/openvino/ RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python @@ -184,7 +184,7 @@ RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025 # update oneTBB RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \ @@ -305,12 +305,12 @@ WORKDIR /ovms/src/example/SampleCpuExtension/ RUN make && cp libcustom_relu_cpu_extension.so /opt RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \ - mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \ - echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA + mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \ + echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA WORKDIR /ovms -ARG PROJECT_VERSION="2024.5" +ARG PROJECT_VERSION="2025.0" ARG PROJECT_NAME="OpenVINO Model Server" LABEL description=${PROJECT_NAME} ARG minitrace_flags diff --git a/Makefile b/Makefile index d5bbf6553e..e7dcaf68e3 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ FUZZER_BUILD ?= 0 # - adjust binary version path - version variable is not passed to WORKSPACE file! OV_SOURCE_BRANCH ?= db64e5c66a9fdede7ecb8473b399ac94210f5136 # releases/2024/5 2024-11-09 OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677 # releases/2024/5 2024-10-31 -OV_TOKENIZERS_BRANCH ?= 92bec551bdb1eed94aa375415c82b4068520c7b9 # releases/2024/5 2024-11-04 +OV_TOKENIZERS_BRANCH ?= 5ccd56de5a7da65ad7ea7c5c42cec55be97df312 # master / 2024-11-14 OV_SOURCE_ORG ?= openvinotoolkit OV_CONTRIB_ORG ?= openvinotoolkit @@ -166,11 +166,11 @@ ifeq ($(findstring ubuntu,$(BASE_OS)),ubuntu) ifeq ($(BASE_OS_TAG),20.04) OS=ubuntu20 INSTALL_DRIVER_VERSION ?= "22.43.24595" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64.tgz + DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2025.0.0.17377.6733cc32091_x86_64.tgz else ifeq ($(BASE_OS_TAG),22.04) OS=ubuntu22 INSTALL_DRIVER_VERSION ?= "24.26.30049" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz + DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2025.0.0.17377.6733cc32091_x86_64.tgz endif endif ifeq ($(BASE_OS),redhat) @@ -185,7 +185,7 @@ ifeq ($(BASE_OS),redhat) endif DIST_OS=redhat INSTALL_DRIVER_VERSION ?= "23.22.26516" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64.tgz + DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2025.0.0.17377.6733cc32091_x86_64.tgz endif OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server diff --git a/create_package.sh b/create_package.sh index b957e6f973..206e6d9459 100755 --- a/create_package.sh +++ b/create_package.sh @@ -14,7 +14,7 @@ # limitations under the License. # -# This script should be used inside the build image to create a binary package based on the compiled artefacts +# This script should be used inside the build image to create a binary package based on the compiled artifacts env mkdir -vp /ovms_release/bin @@ -28,7 +28,7 @@ find /ovms/bazel-out/k8-*/bin -iname '*.so*' ! -type d ! -name "libgtest.so" ! - mv /ovms_release/lib/libcustom_node* /ovms_release/lib/custom_nodes/ cd /ovms_release/lib/ ; rm -f libazurestorage.so.* ; ln -s libazurestorage.so libazurestorage.so.7 ;ln -s libazurestorage.so libazurestorage.so.7.5 cd /ovms_release/lib/ ; rm -f libcpprest.so.2.10 ; ln -s libcpprest.so libcpprest.so.2.10 -if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2450 ; ln -s libopenvino_genai.so.2024.5.0.0 libopenvino_genai.so.2450 ; fi +if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2500 ; ln -s libopenvino_genai.so.2025.0.0.0 libopenvino_genai.so.2500 ; fi rm -f /ovms_release/lib/libssl.so rm -f /ovms_release/lib/libsampleloader* diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp index 474c1db06a..5b92d5ed87 100644 --- a/src/llm/apis/openai_completions.cpp +++ b/src/llm/apis/openai_completions.cpp @@ -417,7 +417,6 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect writer.String("choices"); writer.StartArray(); // [ int index = 0; - int n = request.numReturnSequences.value_or(1); usage.completionTokens = 0; for (const ov::genai::GenerationOutput& generationOutput : generationOutputs) { SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated tokens: {}", generationOutput.generated_ids); diff --git a/third_party/llm_engine/llm_engine.bzl b/third_party/llm_engine/llm_engine.bzl index a337f325e9..9259b300d2 100644 --- a/third_party/llm_engine/llm_engine.bzl +++ b/third_party/llm_engine/llm_engine.bzl @@ -20,7 +20,7 @@ def llm_engine(): new_git_repository( name = "llm_engine", remote = "https://github.com/openvinotoolkit/openvino.genai", - commit = "366662bc19f7f52db0975549ffc4d403021e89d5", # releases/2024/5 + commit = "96bcffebddf724fb7c9e28f0bb6fdabf36e96814", # master / Nov 14 build_file = "@_llm_engine//:BUILD", init_submodules = True, recursive_init_submodules = True, @@ -98,11 +98,11 @@ cmake( out_include_dir = "runtime/include", # linking order out_shared_libs = [ - "libopenvino_genai.so.2450", + "libopenvino_genai.so.2500", ], tags = ["requires-network"], visibility = ["//visibility:public"], - lib_name = "libopenvino_genai.so.2450", + lib_name = "libopenvino_genai.so.2500", ) cc_library( From ae6a8a38f5ad3b1327859fbfc745af693cc424b4 Mon Sep 17 00:00:00 2001 From: mzegla Date: Mon, 18 Nov 2024 13:19:34 +0100 Subject: [PATCH 03/10] sample cpu extension update --- src/example/SampleCpuExtension/Dockerfile.redhat | 2 +- src/example/SampleCpuExtension/Dockerfile.ubuntu | 2 +- src/example/SampleCpuExtension/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/example/SampleCpuExtension/Dockerfile.redhat b/src/example/SampleCpuExtension/Dockerfile.redhat index f14a16a97d..5eecb5c4d7 100644 --- a/src/example/SampleCpuExtension/Dockerfile.redhat +++ b/src/example/SampleCpuExtension/Dockerfile.redhat @@ -31,7 +31,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \ mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2025 WORKDIR /workspace COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./ diff --git a/src/example/SampleCpuExtension/Dockerfile.ubuntu b/src/example/SampleCpuExtension/Dockerfile.ubuntu index 6de6822438..b6e480e85e 100644 --- a/src/example/SampleCpuExtension/Dockerfile.ubuntu +++ b/src/example/SampleCpuExtension/Dockerfile.ubuntu @@ -32,7 +32,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \ mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025 WORKDIR /workspace COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./ diff --git a/src/example/SampleCpuExtension/Makefile b/src/example/SampleCpuExtension/Makefile index bb09ff7218..93cd64f109 100644 --- a/src/example/SampleCpuExtension/Makefile +++ b/src/example/SampleCpuExtension/Makefile @@ -14,7 +14,7 @@ # limitations under the License. # -OPENVINO_PATH ?= /opt/intel/openvino_2024 +OPENVINO_PATH ?= /opt/intel/openvino_2025 all: $(eval SHELL:=/bin/bash) /usr/bin/g++ --version From 6d98f54985cbeaa44d286fc0f1a1c2aba5cd0074 Mon Sep 17 00:00:00 2001 From: mzegla Date: Mon, 18 Nov 2024 13:31:45 +0100 Subject: [PATCH 04/10] further updates --- Makefile | 2 +- demos/common/export_models/requirements.txt | 4 +- demos/embeddings/README.md | 12 ----- .../download_model_requirements.txt | 2 +- demos/rerank/README.md | 12 ----- src/llm/llm_calculator.proto | 17 +++--- src/llm/llmnoderesources.cpp | 5 -- tests/file_lists/lib_files.txt | 52 +++++++++--------- tests/file_lists/lib_files_python.txt | 54 +++++++++---------- 9 files changed, 64 insertions(+), 96 deletions(-) diff --git a/Makefile b/Makefile index e7dcaf68e3..b9aa44bdd1 100644 --- a/Makefile +++ b/Makefile @@ -201,7 +201,7 @@ endif OVMS_PYTHON_IMAGE_TAG ?= py PRODUCT_NAME = "OpenVINO Model Server" -PRODUCT_VERSION ?= "2024.5" +PRODUCT_VERSION ?= "2025.0" PROJECT_VER_PATCH = $(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`) diff --git a/demos/common/export_models/requirements.txt b/demos/common/export_models/requirements.txt index ce9e5221ec..9c704e9a89 100644 --- a/demos/common/export_models/requirements.txt +++ b/demos/common/export_models/requirements.txt @@ -2,8 +2,8 @@ --extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly" --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git -openvino-tokenizers[transformers]==2024.5.* -openvino==2024.5.* +openvino-tokenizers[transformers]==2025.0.* +openvino==2025.0.* nncf>=2.11.0 sentence_transformers==3.1.1 openai diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md index 32c9edcfa0..927420ff8f 100644 --- a/demos/embeddings/README.md +++ b/demos/embeddings/README.md @@ -2,18 +2,6 @@ This demo shows how to deploy embeddings models in the OpenVINO Model Server for text feature extractions. Text generation use case is exposed via OpenAI API `embeddings` endpoint. -## Get the docker image - -Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5. -```bash -git clone https://github.com/openvinotoolkit/model_server.git -cd model_server -make release_image GPU=1 -``` -It will create an image called `openvino/model_server:latest`. -> **Note:** This operation might take 40min or more depending on your build host. -> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device. - ## Model preparation > **Note** Python 3.9 or higher is needed for that step > diff --git a/demos/python_demos/clip_image_classification/download_model_requirements.txt b/demos/python_demos/clip_image_classification/download_model_requirements.txt index 4b647a6501..60d5608cb1 100644 --- a/demos/python_demos/clip_image_classification/download_model_requirements.txt +++ b/demos/python_demos/clip_image_classification/download_model_requirements.txt @@ -1,7 +1,7 @@ --extra-index-url "https://download.pytorch.org/whl/cpu" --extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly" --pre -openvino==2024.5.* +openvino==2025.0.* numpy<2.0 transformers==4.40.2 pillow==10.3.0 diff --git a/demos/rerank/README.md b/demos/rerank/README.md index 92cd75b1f2..af39285ab8 100644 --- a/demos/rerank/README.md +++ b/demos/rerank/README.md @@ -1,17 +1,5 @@ # How to serve Rerank models via Cohere API {#ovms_demos_rerank} -## Get the docker image - -Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5. -```bash -git clone https://github.com/openvinotoolkit/model_server.git -cd model_server -make release_image GPU=1 -``` -It will create an image called `openvino/model_server:latest`. -> **Note:** This operation might take 40min or more depending on your build host. -> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device. - ## Model preparation > **Note** Python 3.9 or higher is needed for that step Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized. diff --git a/src/llm/llm_calculator.proto b/src/llm/llm_calculator.proto index e6d3b8c01c..43c7c031fb 100644 --- a/src/llm/llm_calculator.proto +++ b/src/llm/llm_calculator.proto @@ -32,20 +32,17 @@ message LLMCalculatorOptions { // model memory cache to allocate in GB optional uint64 cache_size = 3 [default = 8]; - // DEPRECATED - this option is not effective starting with 2024.5 and will be removed in 2025.0 release - optional uint64 block_size = 4 [default = 32]; + optional uint64 max_num_seqs = 4 [default = 256]; - optional uint64 max_num_seqs = 5 [default = 256]; + optional bool dynamic_split_fuse = 5 [default = true]; - optional bool dynamic_split_fuse = 7 [default = true]; + optional string device = 6 [default = "CPU"]; - optional string device = 8 [default = "CPU"]; + optional string plugin_config = 7 [default = ""]; - optional string plugin_config = 9 [default = ""]; + optional uint32 best_of_limit = 8 [default = 20]; - optional uint32 best_of_limit = 10 [default = 20]; + optional uint32 max_tokens_limit = 9 [default = 4096]; - optional uint32 max_tokens_limit = 11 [default = 4096]; - - optional bool enable_prefix_caching = 12 [default = false]; + optional bool enable_prefix_caching = 10 [default = false]; } \ No newline at end of file diff --git a/src/llm/llmnoderesources.cpp b/src/llm/llmnoderesources.cpp index 2e892b58a2..dbfab92b28 100644 --- a/src/llm/llmnoderesources.cpp +++ b/src/llm/llmnoderesources.cpp @@ -145,11 +145,6 @@ Status LLMNodeResources::initializeLLMNodeResources(LLMNodeResources& nodeResour return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST; } - // TODO: Remove along with block_size option in the proto in 2025.x release - if (nodeOptions.has_block_size()) { - SPDLOG_LOGGER_WARN(modelmanager_logger, "Since 2024.5, block_size is selected automatically and setting it explicitly is ineffective. " - "Please remove it from the configuration as in 2025.0 it will cause error."); - } nodeResources.schedulerConfig = { .max_num_batched_tokens = nodeOptions.max_num_batched_tokens(), .cache_size = nodeOptions.cache_size(), diff --git a/tests/file_lists/lib_files.txt b/tests/file_lists/lib_files.txt index ebf7c3fafa..8492f5c369 100644 --- a/tests/file_lists/lib_files.txt +++ b/tests/file_lists/lib_files.txt @@ -43,38 +43,38 @@ libopencv_videoio.so.410->libopencv_videoio.so.4.10.0 libopencv_ximgproc.so->libopencv_ximgproc.so.410 libopencv_ximgproc.so.4.10.0 libopencv_ximgproc.so.410->libopencv_ximgproc.so.4.10.0 -libopenvino.so->libopenvino.so.2450 -libopenvino.so.2024.5.0 -libopenvino.so.2450->libopenvino.so.2024.5.0 +libopenvino.so->libopenvino.so.2500 +libopenvino.so.2025.0.0 +libopenvino.so.2500->libopenvino.so.2025.0.0 libopenvino_auto_batch_plugin.so libopenvino_auto_plugin.so -libopenvino_c.so->libopenvino_c.so.2450 -libopenvino_c.so.2024.5.0 -libopenvino_c.so.2450->libopenvino_c.so.2024.5.0 +libopenvino_c.so->libopenvino_c.so.2500 +libopenvino_c.so.2025.0.0 +libopenvino_c.so.2500->libopenvino_c.so.2025.0.0 libopenvino_hetero_plugin.so libopenvino_intel_cpu_plugin.so libopenvino_intel_gpu_plugin.so libopenvino_intel_npu_plugin.so -libopenvino_ir_frontend.so.2024.5.0 -libopenvino_ir_frontend.so.2450->libopenvino_ir_frontend.so.2024.5.0 -libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2450 -libopenvino_jax_frontend.so.2024.5.0 -libopenvino_jax_frontend.so.2450->libopenvino_jax_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2450 -libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so.2450->libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2450 -libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so.2450->libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2450 -libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so.2450->libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2450 -libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so.2450->libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2450 -libopenvino_tensorflow_lite_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so.2450->libopenvino_tensorflow_lite_frontend.so.2024.5.0 +libopenvino_ir_frontend.so.2025.0.0 +libopenvino_ir_frontend.so.2500->libopenvino_ir_frontend.so.2025.0.0 +libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2500 +libopenvino_jax_frontend.so.2025.0.0 +libopenvino_jax_frontend.so.2500->libopenvino_jax_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2500 +libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so.2500->libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2500 +libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so.2500->libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2500 +libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so.2500->libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2500 +libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so.2500->libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2500 +libopenvino_tensorflow_lite_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so.2500->libopenvino_tensorflow_lite_frontend.so.2025.0.0 libopenvino_tokenizers.so libtbb.so->libtbb.so.12 libtbb.so.12->libtbb.so.12.13 diff --git a/tests/file_lists/lib_files_python.txt b/tests/file_lists/lib_files_python.txt index b40a7de327..b4a34286f7 100644 --- a/tests/file_lists/lib_files_python.txt +++ b/tests/file_lists/lib_files_python.txt @@ -43,40 +43,40 @@ libopencv_videoio.so.410->libopencv_videoio.so.4.10.0 libopencv_ximgproc.so->libopencv_ximgproc.so.410 libopencv_ximgproc.so.4.10.0 libopencv_ximgproc.so.410->libopencv_ximgproc.so.4.10.0 -libopenvino.so->libopenvino.so.2450 -libopenvino.so.2024.5.0 -libopenvino.so.2450->libopenvino.so.2024.5.0 +libopenvino.so->libopenvino.so.2500 +libopenvino.so.2025.0.0 +libopenvino.so.2500->libopenvino.so.2025.0.0 libopenvino_auto_batch_plugin.so libopenvino_auto_plugin.so -libopenvino_c.so->libopenvino_c.so.2450 -libopenvino_c.so.2024.5.0 -libopenvino_c.so.2450->libopenvino_c.so.2024.5.0 +libopenvino_c.so->libopenvino_c.so.2500 +libopenvino_c.so.2025.0.0 +libopenvino_c.so.2500->libopenvino_c.so.2025.0.0 libopenvino_genai.so -libopenvino_genai.so.2450->libopenvino_genai.so +libopenvino_genai.so.2500->libopenvino_genai.so libopenvino_hetero_plugin.so libopenvino_intel_cpu_plugin.so libopenvino_intel_gpu_plugin.so libopenvino_intel_npu_plugin.so -libopenvino_ir_frontend.so.2024.5.0 -libopenvino_ir_frontend.so.2450->libopenvino_ir_frontend.so.2024.5.0 -libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2450 -libopenvino_jax_frontend.so.2024.5.0 -libopenvino_jax_frontend.so.2450->libopenvino_jax_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2450 -libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so.2450->libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2450 -libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so.2450->libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2450 -libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so.2450->libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2450 -libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so.2450->libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2450 -libopenvino_tensorflow_lite_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so.2450->libopenvino_tensorflow_lite_frontend.so.2024.5.0 +libopenvino_ir_frontend.so.2025.0.0 +libopenvino_ir_frontend.so.2500->libopenvino_ir_frontend.so.2025.0.0 +libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2500 +libopenvino_jax_frontend.so.2025.0.0 +libopenvino_jax_frontend.so.2500->libopenvino_jax_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2500 +libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so.2500->libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2500 +libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so.2500->libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2500 +libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so.2500->libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2500 +libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so.2500->libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2500 +libopenvino_tensorflow_lite_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so.2500->libopenvino_tensorflow_lite_frontend.so.2025.0.0 libopenvino_tokenizers.so libtbb.so->libtbb.so.12 libtbb.so.12->libtbb.so.12.13 From ec9f160eb101a4ecad1d148f9130341a5e1c86e1 Mon Sep 17 00:00:00 2001 From: mzegla Date: Mon, 18 Nov 2024 13:56:48 +0100 Subject: [PATCH 05/10] remove cb patch --- ci/lib_search.py | 1 - docs/developer_guide.md | 8 ++++---- external/BUILD | 1 - external/cb.patch | 18 ------------------ spelling-whitelist.txt | 1 - third_party/llm_engine/llm_engine.bzl | 2 -- 6 files changed, 4 insertions(+), 27 deletions(-) delete mode 100644 external/cb.patch diff --git a/ci/lib_search.py b/ci/lib_search.py index 05b1ca905a..27578e5028 100644 --- a/ci/lib_search.py +++ b/ci/lib_search.py @@ -83,7 +83,6 @@ def check_dir(start_dir): '__pycache__', 'add.xml', 'azure_sdk.patch', - 'cb.patch', 'bazel-', 'check_coverage.bat', 'genhtml', diff --git a/docs/developer_guide.md b/docs/developer_guide.md index fdd6576db3..e19a0bf7d3 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -61,15 +61,15 @@ In-case of problems, see [Debugging](#debugging). docker run -it -v ${PWD}:/ovms --entrypoint bash -p 9178:9178 openvino/model_server-build:latest ``` -4. In the docker container context compile the source code via : +4. In the docker container context compile the source code via (choose distro `ubuntu` or `redhat` depending on the image type): ```bash - bazel build --config=mp_on_py_on //src:ovms + bazel build --//:distro=ubuntu --config=mp_on_py_on //src:ovms > **NOTE**: There are several options that would disable specific parts of OVMS. For details check ovms bazel build files. ``` -5. From the container, run a single unit test : +5. From the container, run a single unit test (choose distro `ubuntu` or `redhat` depending on the image type): ```bash - bazel test --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test + bazel test --//:distro=ubuntu --config=mp_on_py_on --test_summary=detailed --test_output=streamed --test_filter='*' //src:ovms_test ``` | Argument | Description | diff --git a/external/BUILD b/external/BUILD index c6ad55a0d8..0f9b9c8882 100644 --- a/external/BUILD +++ b/external/BUILD @@ -47,5 +47,4 @@ exports_files([ "listen.patch", "tf.patch", "net_http.patch", - "cb.patch", ]) \ No newline at end of file diff --git a/external/cb.patch b/external/cb.patch deleted file mode 100644 index 110765cc09..0000000000 --- a/external/cb.patch +++ /dev/null @@ -1,18 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index b08debb..4171092 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -62,9 +62,9 @@ endif() - - add_subdirectory(thirdparty) - add_subdirectory(src) --add_subdirectory(samples) --add_subdirectory(tools/continuous_batching) --add_subdirectory(tests/cpp) -+#add_subdirectory(samples) -+#add_subdirectory(tools/continuous_batching) -+#add_subdirectory(tests/cpp) - - install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI) - install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt) - diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt index d979ae1767..361f842e28 100644 --- a/spelling-whitelist.txt +++ b/spelling-whitelist.txt @@ -1,7 +1,6 @@ client/common/resnet_labels.txt demos/common/python/classes.py demos/image_classification/go/labels.go -external/cb.patch extras/nginx-mtls-auth/model_server.conf.template release_files/thirdparty-licenses/boringssl.LICENSE.txt src/shape.cpp:436: strIn diff --git a/third_party/llm_engine/llm_engine.bzl b/third_party/llm_engine/llm_engine.bzl index 9259b300d2..f1236fbd8f 100644 --- a/third_party/llm_engine/llm_engine.bzl +++ b/third_party/llm_engine/llm_engine.bzl @@ -24,8 +24,6 @@ def llm_engine(): build_file = "@_llm_engine//:BUILD", init_submodules = True, recursive_init_submodules = True, - patch_args = ["-p1"], - patches = ["cb.patch"], ) # when using local repository manually run: git submodule update --recursive #native.new_local_repository( From 65538be9a8cd7e9695d9913a564777f31a3f83cd Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 20 Nov 2024 11:40:35 +0100 Subject: [PATCH 06/10] Revert "remove cb patch" This reverts commit ec9f160eb101a4ecad1d148f9130341a5e1c86e1. --- ci/lib_search.py | 1 + docs/developer_guide.md | 8 ++++---- external/BUILD | 1 + external/cb.patch | 18 ++++++++++++++++++ spelling-whitelist.txt | 1 + third_party/llm_engine/llm_engine.bzl | 2 ++ 6 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 external/cb.patch diff --git a/ci/lib_search.py b/ci/lib_search.py index 27578e5028..05b1ca905a 100644 --- a/ci/lib_search.py +++ b/ci/lib_search.py @@ -83,6 +83,7 @@ def check_dir(start_dir): '__pycache__', 'add.xml', 'azure_sdk.patch', + 'cb.patch', 'bazel-', 'check_coverage.bat', 'genhtml', diff --git a/docs/developer_guide.md b/docs/developer_guide.md index e19a0bf7d3..fdd6576db3 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -61,15 +61,15 @@ In-case of problems, see [Debugging](#debugging). docker run -it -v ${PWD}:/ovms --entrypoint bash -p 9178:9178 openvino/model_server-build:latest ``` -4. In the docker container context compile the source code via (choose distro `ubuntu` or `redhat` depending on the image type): +4. In the docker container context compile the source code via : ```bash - bazel build --//:distro=ubuntu --config=mp_on_py_on //src:ovms + bazel build --config=mp_on_py_on //src:ovms > **NOTE**: There are several options that would disable specific parts of OVMS. For details check ovms bazel build files. ``` -5. From the container, run a single unit test (choose distro `ubuntu` or `redhat` depending on the image type): +5. From the container, run a single unit test : ```bash - bazel test --//:distro=ubuntu --config=mp_on_py_on --test_summary=detailed --test_output=streamed --test_filter='*' //src:ovms_test + bazel test --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test ``` | Argument | Description | diff --git a/external/BUILD b/external/BUILD index 0f9b9c8882..c6ad55a0d8 100644 --- a/external/BUILD +++ b/external/BUILD @@ -47,4 +47,5 @@ exports_files([ "listen.patch", "tf.patch", "net_http.patch", + "cb.patch", ]) \ No newline at end of file diff --git a/external/cb.patch b/external/cb.patch new file mode 100644 index 0000000000..110765cc09 --- /dev/null +++ b/external/cb.patch @@ -0,0 +1,18 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index b08debb..4171092 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -62,9 +62,9 @@ endif() + + add_subdirectory(thirdparty) + add_subdirectory(src) +-add_subdirectory(samples) +-add_subdirectory(tools/continuous_batching) +-add_subdirectory(tests/cpp) ++#add_subdirectory(samples) ++#add_subdirectory(tools/continuous_batching) ++#add_subdirectory(tests/cpp) + + install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI) + install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt) + diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt index 361f842e28..d979ae1767 100644 --- a/spelling-whitelist.txt +++ b/spelling-whitelist.txt @@ -1,6 +1,7 @@ client/common/resnet_labels.txt demos/common/python/classes.py demos/image_classification/go/labels.go +external/cb.patch extras/nginx-mtls-auth/model_server.conf.template release_files/thirdparty-licenses/boringssl.LICENSE.txt src/shape.cpp:436: strIn diff --git a/third_party/llm_engine/llm_engine.bzl b/third_party/llm_engine/llm_engine.bzl index f1236fbd8f..9259b300d2 100644 --- a/third_party/llm_engine/llm_engine.bzl +++ b/third_party/llm_engine/llm_engine.bzl @@ -24,6 +24,8 @@ def llm_engine(): build_file = "@_llm_engine//:BUILD", init_submodules = True, recursive_init_submodules = True, + patch_args = ["-p1"], + patches = ["cb.patch"], ) # when using local repository manually run: git submodule update --recursive #native.new_local_repository( From 9a38d73910b38d4c02a9e62754059e5b72d4d84f Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 20 Nov 2024 11:44:43 +0100 Subject: [PATCH 07/10] adjust cb patch and add more info to dev guide --- docs/developer_guide.md | 8 ++++---- external/cb.patch | 28 ++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/docs/developer_guide.md b/docs/developer_guide.md index fdd6576db3..8a8bdbf2ec 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -61,15 +61,15 @@ In-case of problems, see [Debugging](#debugging). docker run -it -v ${PWD}:/ovms --entrypoint bash -p 9178:9178 openvino/model_server-build:latest ``` -4. In the docker container context compile the source code via : +4. In the docker container context compile the source code via (choose distro `ubuntu` or `redhat` depending on the image type): ```bash - bazel build --config=mp_on_py_on //src:ovms + bazel build --//:distro=ubuntu --config=mp_on_py_on //src:ovms > **NOTE**: There are several options that would disable specific parts of OVMS. For details check ovms bazel build files. ``` -5. From the container, run a single unit test : +5. From the container, run a single unit test (choose distro `ubuntu` or `redhat` depending on the image type): ```bash - bazel test --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test + bazel test --//:distro=ubuntu --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test ``` | Argument | Description | diff --git a/external/cb.patch b/external/cb.patch index 110765cc09..f33322a49c 100644 --- a/external/cb.patch +++ b/external/cb.patch @@ -1,17 +1,29 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index b08debb..4171092 100644 +index da39b5c..d3fa72b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -62,9 +62,9 @@ endif() +@@ -81,15 +81,15 @@ endif() add_subdirectory(thirdparty) add_subdirectory(src) --add_subdirectory(samples) --add_subdirectory(tools/continuous_batching) --add_subdirectory(tests/cpp) -+#add_subdirectory(samples) -+#add_subdirectory(tools/continuous_batching) -+#add_subdirectory(tests/cpp) +-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples") +- add_subdirectory(samples) +-endif() +-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching") +- add_subdirectory(tools/continuous_batching) +-endif() +-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp") +- add_subdirectory(tests/cpp) +-endif() ++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples") ++# add_subdirectory(samples) ++#endif() ++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching") ++# add_subdirectory(tools/continuous_batching) ++#endif() ++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp") ++# add_subdirectory(tests/cpp) ++#endif() install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI) install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt) From 99077f3906f60a6106dcd844b9d343304de45a28 Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 20 Nov 2024 11:45:54 +0100 Subject: [PATCH 08/10] adjust test --- src/test/llmnode_test.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/test/llmnode_test.cpp b/src/test/llmnode_test.cpp index ad7d14b4c7..9c73308966 100644 --- a/src/test/llmnode_test.cpp +++ b/src/test/llmnode_test.cpp @@ -2734,7 +2734,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) { models_path: "/ovms/src/test/llm_testing/facebook/opt-125m" max_num_batched_tokens: 1024 cache_size: 1 - block_size: 8 max_num_seqs: 95 dynamic_split_fuse: false enable_prefix_caching: true @@ -2762,9 +2761,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) { ASSERT_EQ(nodeResources.schedulerConfig.max_num_batched_tokens, 1024); ASSERT_EQ(nodeResources.schedulerConfig.cache_size, 1); - // We create graph with block_size set in graph config to make sure setting it does not result in error - // TODO: Remove below commented assertion as well as block_size from the testPbtxt when block_size is removed from options proto. - // ASSERT_EQ(nodeResources.schedulerConfig.block_size, 8); ASSERT_EQ(nodeResources.schedulerConfig.dynamic_split_fuse, false); ASSERT_EQ(nodeResources.schedulerConfig.max_num_seqs, 95); ASSERT_EQ(nodeResources.schedulerConfig.enable_prefix_caching, true); From 62b3b4855359cddeefb8927e5373e81d3359c984 Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 20 Nov 2024 11:56:20 +0100 Subject: [PATCH 09/10] update genai --- third_party/llm_engine/llm_engine.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/llm_engine/llm_engine.bzl b/third_party/llm_engine/llm_engine.bzl index 9259b300d2..8c66288280 100644 --- a/third_party/llm_engine/llm_engine.bzl +++ b/third_party/llm_engine/llm_engine.bzl @@ -20,7 +20,7 @@ def llm_engine(): new_git_repository( name = "llm_engine", remote = "https://github.com/openvinotoolkit/openvino.genai", - commit = "96bcffebddf724fb7c9e28f0bb6fdabf36e96814", # master / Nov 14 + commit = "17536724b9f798bea871c8775fb1a97f69714d35", # master / Nov 20 build_file = "@_llm_engine//:BUILD", init_submodules = True, recursive_init_submodules = True, From 3fe7bf0b2e297dd49c70e835b6e2619d25717784 Mon Sep 17 00:00:00 2001 From: mzegla Date: Thu, 21 Nov 2024 11:38:39 +0100 Subject: [PATCH 10/10] update ov source commit --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b9aa44bdd1..b14acdd158 100644 --- a/Makefile +++ b/Makefile @@ -74,8 +74,8 @@ FUZZER_BUILD ?= 0 # NOTE: when changing any value below, you'll need to adjust WORKSPACE file by hand: # - uncomment source build section, comment binary section # - adjust binary version path - version variable is not passed to WORKSPACE file! -OV_SOURCE_BRANCH ?= db64e5c66a9fdede7ecb8473b399ac94210f5136 # releases/2024/5 2024-11-09 -OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677 # releases/2024/5 2024-10-31 +OV_SOURCE_BRANCH ?= 6733cc320915ca6bfad9036940bf5ca244b41a8b # master / 2024-11-18 +OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677 # master / 2024-10-31 OV_TOKENIZERS_BRANCH ?= 5ccd56de5a7da65ad7ea7c5c42cec55be97df312 # master / 2024-11-14 OV_SOURCE_ORG ?= openvinotoolkit