Skip to content

Commit

Permalink
Updates for 2025.0 (#2807)
Browse files Browse the repository at this point in the history
  • Loading branch information
mzegla authored Nov 21, 2024
1 parent f0579aa commit b7b57e7
Show file tree
Hide file tree
Showing 21 changed files with 116 additions and 148 deletions.
14 changes: 7 additions & 7 deletions Dockerfile.redhat
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
ln -s /openvino/inference-engine/temp/opencv_*/opencv /opt/intel/openvino/extras && \
ln -s /usr/local/runtime /opt/intel/openvino && \
ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
ln -s /opt/intel/openvino /opt/intel/openvino_2024
ln -s /opt/intel/openvino /opt/intel/openvino_2025
RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/Release/python /opt/intel/openvino/
RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python
################## END OF OPENVINO SOURCE BUILD ######################
Expand All @@ -212,7 +212,7 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025

# update oneTBB
RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \
Expand Down Expand Up @@ -300,13 +300,13 @@ WORKDIR /ovms/src/example/SampleCpuExtension/
RUN make

RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \
mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \
echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA
mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \
echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA
ENV PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding

WORKDIR /ovms

ARG PROJECT_VERSION="2024.5"
ARG PROJECT_VERSION="2025.0"
ARG PROJECT_NAME="OpenVINO Model Server"
LABEL description=${PROJECT_NAME}
ARG minitrace_flags
Expand Down Expand Up @@ -398,8 +398,8 @@ RUN if [ -f /ovms_release/lib/libovms_shared.so ] ; then mv /ovms_release/lib/li
FROM $RELEASE_BASE_IMAGE as release
LABEL "name"="OVMS"
LABEL "vendor"="Intel Corporation"
LABEL "version"="2024.5"
LABEL "release"="2024"
LABEL "version"="2025.0"
LABEL "release"="2025"
LABEL "summary"="OpenVINO(TM) Model Server"
LABEL "description"="OpenVINO(TM) Model Server is a solution for serving AI models"
ARG INSTALL_RPMS_FROM_URL=
Expand Down
10 changes: 5 additions & 5 deletions Dockerfile.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
mkdir -p /opt/intel/openvino && \
ln -s /usr/local/runtime /opt/intel/openvino && \
ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
ln -s /opt/intel/openvino /opt/intel/openvino_2024
ln -s /opt/intel/openvino /opt/intel/openvino_2025
RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/${CMAKE_BUILD_TYPE}/python /opt/intel/openvino/
RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python

Expand All @@ -184,7 +184,7 @@ RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025

# update oneTBB
RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \
Expand Down Expand Up @@ -305,12 +305,12 @@ WORKDIR /ovms/src/example/SampleCpuExtension/
RUN make && cp libcustom_relu_cpu_extension.so /opt

RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \
mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \
echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA
mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \
echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA

WORKDIR /ovms

ARG PROJECT_VERSION="2024.5"
ARG PROJECT_VERSION="2025.0"
ARG PROJECT_NAME="OpenVINO Model Server"
LABEL description=${PROJECT_NAME}
ARG minitrace_flags
Expand Down
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ FUZZER_BUILD ?= 0
# NOTE: when changing any value below, you'll need to adjust WORKSPACE file by hand:
# - uncomment source build section, comment binary section
# - adjust binary version path - version variable is not passed to WORKSPACE file!
OV_SOURCE_BRANCH ?= db64e5c66a9fdede7ecb8473b399ac94210f5136 # releases/2024/5 2024-11-09
OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677 # releases/2024/5 2024-10-31
OV_TOKENIZERS_BRANCH ?= 92bec551bdb1eed94aa375415c82b4068520c7b9 # releases/2024/5 2024-11-04
OV_SOURCE_BRANCH ?= 6733cc320915ca6bfad9036940bf5ca244b41a8b # master / 2024-11-18
OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677 # master / 2024-10-31
OV_TOKENIZERS_BRANCH ?= 5ccd56de5a7da65ad7ea7c5c42cec55be97df312 # master / 2024-11-14

OV_SOURCE_ORG ?= openvinotoolkit
OV_CONTRIB_ORG ?= openvinotoolkit
Expand Down Expand Up @@ -166,11 +166,11 @@ ifeq ($(findstring ubuntu,$(BASE_OS)),ubuntu)
ifeq ($(BASE_OS_TAG),20.04)
OS=ubuntu20
INSTALL_DRIVER_VERSION ?= "22.43.24595"
DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64.tgz
DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2025.0.0.17377.6733cc32091_x86_64.tgz
else ifeq ($(BASE_OS_TAG),22.04)
OS=ubuntu22
INSTALL_DRIVER_VERSION ?= "24.26.30049"
DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz
DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2025.0.0.17377.6733cc32091_x86_64.tgz
endif
endif
ifeq ($(BASE_OS),redhat)
Expand All @@ -185,7 +185,7 @@ ifeq ($(BASE_OS),redhat)
endif
DIST_OS=redhat
INSTALL_DRIVER_VERSION ?= "23.22.26516"
DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64.tgz
DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2025.0.0.17377.6733cc32091_x86_64.tgz
endif

OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server
Expand All @@ -201,7 +201,7 @@ endif
OVMS_PYTHON_IMAGE_TAG ?= py

PRODUCT_NAME = "OpenVINO Model Server"
PRODUCT_VERSION ?= "2024.5"
PRODUCT_VERSION ?= "2025.0"
PROJECT_VER_PATCH =

$(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)
Expand Down
4 changes: 2 additions & 2 deletions create_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
#

# This script should be used inside the build image to create a binary package based on the compiled artefacts
# This script should be used inside the build image to create a binary package based on the compiled artifacts

env
mkdir -vp /ovms_release/bin
Expand All @@ -28,7 +28,7 @@ find /ovms/bazel-out/k8-*/bin -iname '*.so*' ! -type d ! -name "libgtest.so" ! -
mv /ovms_release/lib/libcustom_node* /ovms_release/lib/custom_nodes/
cd /ovms_release/lib/ ; rm -f libazurestorage.so.* ; ln -s libazurestorage.so libazurestorage.so.7 ;ln -s libazurestorage.so libazurestorage.so.7.5
cd /ovms_release/lib/ ; rm -f libcpprest.so.2.10 ; ln -s libcpprest.so libcpprest.so.2.10
if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2450 ; ln -s libopenvino_genai.so.2024.5.0.0 libopenvino_genai.so.2450 ; fi
if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2500 ; ln -s libopenvino_genai.so.2025.0.0.0 libopenvino_genai.so.2500 ; fi
rm -f /ovms_release/lib/libssl.so
rm -f /ovms_release/lib/libsampleloader*

Expand Down
4 changes: 2 additions & 2 deletions demos/common/export_models/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
--extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly"
--pre
optimum-intel@git+https://github.com/huggingface/optimum-intel.git
openvino-tokenizers[transformers]==2024.5.*
openvino==2024.5.*
openvino-tokenizers[transformers]==2025.0.*
openvino==2025.0.*
nncf>=2.11.0
sentence_transformers==3.1.1
openai
Expand Down
12 changes: 0 additions & 12 deletions demos/embeddings/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,6 @@
This demo shows how to deploy embeddings models in the OpenVINO Model Server for text feature extractions.
Text generation use case is exposed via OpenAI API `embeddings` endpoint.

## Get the docker image

Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5.
```bash
git clone https://github.com/openvinotoolkit/model_server.git
cd model_server
make release_image GPU=1
```
It will create an image called `openvino/model_server:latest`.
> **Note:** This operation might take 40min or more depending on your build host.
> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device.
## Model preparation
> **Note** Python 3.9 or higher is needed for that step
>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
--extra-index-url "https://download.pytorch.org/whl/cpu"
--extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly"
--pre
openvino==2024.5.*
openvino==2025.0.*
numpy<2.0
transformers==4.40.2
pillow==10.3.0
Expand Down
12 changes: 0 additions & 12 deletions demos/rerank/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
# How to serve Rerank models via Cohere API {#ovms_demos_rerank}

## Get the docker image

Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5.
```bash
git clone https://github.com/openvinotoolkit/model_server.git
cd model_server
make release_image GPU=1
```
It will create an image called `openvino/model_server:latest`.
> **Note:** This operation might take 40min or more depending on your build host.
> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device.
## Model preparation
> **Note** Python 3.9 or higher is needed for that step
Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized.
Expand Down
8 changes: 4 additions & 4 deletions docs/developer_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,15 @@ In-case of problems, see [Debugging](#debugging).
docker run -it -v ${PWD}:/ovms --entrypoint bash -p 9178:9178 openvino/model_server-build:latest
```

4. In the docker container context compile the source code via :
4. In the docker container context compile the source code via (choose distro `ubuntu` or `redhat` depending on the image type):
```bash
bazel build --config=mp_on_py_on //src:ovms
bazel build --//:distro=ubuntu --config=mp_on_py_on //src:ovms
> **NOTE**: There are several options that would disable specific parts of OVMS. For details check ovms bazel build files.
```

5. From the container, run a single unit test :
5. From the container, run a single unit test (choose distro `ubuntu` or `redhat` depending on the image type):
```bash
bazel test --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test
bazel test --//:distro=ubuntu --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test
```

| Argument | Description |
Expand Down
28 changes: 20 additions & 8 deletions external/cb.patch
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b08debb..4171092 100644
index da39b5c..d3fa72b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -62,9 +62,9 @@ endif()
@@ -81,15 +81,15 @@ endif()

add_subdirectory(thirdparty)
add_subdirectory(src)
-add_subdirectory(samples)
-add_subdirectory(tools/continuous_batching)
-add_subdirectory(tests/cpp)
+#add_subdirectory(samples)
+#add_subdirectory(tools/continuous_batching)
+#add_subdirectory(tests/cpp)
-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples")
- add_subdirectory(samples)
-endif()
-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching")
- add_subdirectory(tools/continuous_batching)
-endif()
-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp")
- add_subdirectory(tests/cpp)
-endif()
+#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples")
+# add_subdirectory(samples)
+#endif()
+#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching")
+# add_subdirectory(tools/continuous_batching)
+#endif()
+#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp")
+# add_subdirectory(tests/cpp)
+#endif()

install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)
Expand Down
2 changes: 1 addition & 1 deletion src/example/SampleCpuExtension/Dockerfile.redhat
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_* /opt/intel/openvino && \
ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2024
ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2025

WORKDIR /workspace
COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./
Expand Down
2 changes: 1 addition & 1 deletion src/example/SampleCpuExtension/Dockerfile.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025
WORKDIR /workspace
COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./

Expand Down
2 changes: 1 addition & 1 deletion src/example/SampleCpuExtension/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
#

OPENVINO_PATH ?= /opt/intel/openvino_2024
OPENVINO_PATH ?= /opt/intel/openvino_2025

all: $(eval SHELL:=/bin/bash)
/usr/bin/g++ --version
Expand Down
8 changes: 2 additions & 6 deletions src/llm/apis/openai_completions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,13 +416,9 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect
// choices: array of size N, where N is related to n request parameter
writer.String("choices");
writer.StartArray(); // [
int i = 0;
int n = request.numReturnSequences.value_or(1);
int index = 0;
usage.completionTokens = 0;
for (const ov::genai::GenerationOutput& generationOutput : generationOutputs) {
if (i >= n)
break;

SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated tokens: {}", generationOutput.generated_ids);
usage.completionTokens += generationOutput.generated_ids.size();
if (request.echo)
Expand All @@ -445,7 +441,7 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect
}
// index: integer; Choice index, only n=1 supported anyway
writer.String("index");
writer.Int(i++);
writer.Int(index++);
// logprobs: object/null; Log probability information for the choice. TODO
writer.String("logprobs");
if (this->request.logprobschat || this->request.logprobs > 0) {
Expand Down
4 changes: 0 additions & 4 deletions src/llm/http_llm_calculator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,6 @@ class HttpLLMCalculator : public CalculatorBase {
return absl::CancelledError();
}
RET_CHECK(generationOutputs.size() >= 1);
std::sort(generationOutputs.begin(), generationOutputs.end(), [](ov::genai::GenerationOutput& r1, ov::genai::GenerationOutput& r2) {
return r1.score > r2.score;
});

std::string response = this->apiHandler->serializeUnaryResponse(generationOutputs);
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Complete unary response: {}", response);
cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, timestamp);
Expand Down
17 changes: 7 additions & 10 deletions src/llm/llm_calculator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,17 @@ message LLMCalculatorOptions {
// model memory cache to allocate in GB
optional uint64 cache_size = 3 [default = 8];

// DEPRECATED - this option is not effective starting with 2024.5 and will be removed in 2025.0 release
optional uint64 block_size = 4 [default = 32];
optional uint64 max_num_seqs = 4 [default = 256];

optional uint64 max_num_seqs = 5 [default = 256];
optional bool dynamic_split_fuse = 5 [default = true];

optional bool dynamic_split_fuse = 7 [default = true];
optional string device = 6 [default = "CPU"];

optional string device = 8 [default = "CPU"];
optional string plugin_config = 7 [default = ""];

optional string plugin_config = 9 [default = ""];
optional uint32 best_of_limit = 8 [default = 20];

optional uint32 best_of_limit = 10 [default = 20];
optional uint32 max_tokens_limit = 9 [default = 4096];

optional uint32 max_tokens_limit = 11 [default = 4096];

optional bool enable_prefix_caching = 12 [default = false];
optional bool enable_prefix_caching = 10 [default = false];
}
5 changes: 0 additions & 5 deletions src/llm/llmnoderesources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,6 @@ Status LLMNodeResources::initializeLLMNodeResources(LLMNodeResources& nodeResour
return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST;
}

// TODO: Remove along with block_size option in the proto in 2025.x release
if (nodeOptions.has_block_size()) {
SPDLOG_LOGGER_WARN(modelmanager_logger, "Since 2024.5, block_size is selected automatically and setting it explicitly is ineffective. "
"Please remove it from the configuration as in 2025.0 it will cause error.");
}
nodeResources.schedulerConfig = {
.max_num_batched_tokens = nodeOptions.max_num_batched_tokens(),
.cache_size = nodeOptions.cache_size(),
Expand Down
4 changes: 0 additions & 4 deletions src/test/llmnode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2734,7 +2734,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) {
models_path: "/ovms/src/test/llm_testing/facebook/opt-125m"
max_num_batched_tokens: 1024
cache_size: 1
block_size: 8
max_num_seqs: 95
dynamic_split_fuse: false
enable_prefix_caching: true
Expand Down Expand Up @@ -2762,9 +2761,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) {

ASSERT_EQ(nodeResources.schedulerConfig.max_num_batched_tokens, 1024);
ASSERT_EQ(nodeResources.schedulerConfig.cache_size, 1);
// We create graph with block_size set in graph config to make sure setting it does not result in error
// TODO: Remove below commented assertion as well as block_size from the testPbtxt when block_size is removed from options proto.
// ASSERT_EQ(nodeResources.schedulerConfig.block_size, 8);
ASSERT_EQ(nodeResources.schedulerConfig.dynamic_split_fuse, false);
ASSERT_EQ(nodeResources.schedulerConfig.max_num_seqs, 95);
ASSERT_EQ(nodeResources.schedulerConfig.enable_prefix_caching, true);
Expand Down
Loading

0 comments on commit b7b57e7

Please sign in to comment.