openvinotoolkit · mzegla · Nov 21, 2024 · Nov 8, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/Dockerfile.redhat b/Dockerfile.redhat
@@ -199,7 +199,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
     ln -s /openvino/inference-engine/temp/opencv_*/opencv /opt/intel/openvino/extras && \
     ln -s /usr/local/runtime /opt/intel/openvino && \
     ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
-    ln -s /opt/intel/openvino /opt/intel/openvino_2024
+    ln -s /opt/intel/openvino /opt/intel/openvino_2025
 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/Release/python /opt/intel/openvino/
 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python
 ################## END OF OPENVINO SOURCE BUILD ######################
@@ -212,7 +212,7 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
     mkdir /opt/intel && \
     tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
     ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
-    ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024
+    ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025
 
 # update oneTBB
 RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \
@@ -300,13 +300,13 @@ WORKDIR /ovms/src/example/SampleCpuExtension/
 RUN make
 
 RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \
-    mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \
-    echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA
+    mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \
+    echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA
 ENV PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding
 
 WORKDIR /ovms
 
-ARG PROJECT_VERSION="2024.5"
+ARG PROJECT_VERSION="2025.0"
 ARG PROJECT_NAME="OpenVINO Model Server"
 LABEL description=${PROJECT_NAME}
 ARG minitrace_flags
@@ -398,8 +398,8 @@ RUN if [ -f /ovms_release/lib/libovms_shared.so ] ; then mv /ovms_release/lib/li
 FROM $RELEASE_BASE_IMAGE as release
 LABEL "name"="OVMS"
 LABEL "vendor"="Intel Corporation"
-LABEL "version"="2024.5"
-LABEL "release"="2024"
+LABEL "version"="2025.0"
+LABEL "release"="2025"
 LABEL "summary"="OpenVINO(TM) Model Server"
 LABEL "description"="OpenVINO(TM) Model Server is a solution for serving AI models"
 ARG INSTALL_RPMS_FROM_URL=

diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu
@@ -167,7 +167,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
     mkdir -p /opt/intel/openvino && \
     ln -s /usr/local/runtime /opt/intel/openvino && \
     ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
-    ln -s /opt/intel/openvino /opt/intel/openvino_2024
+    ln -s /opt/intel/openvino /opt/intel/openvino_2025
 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/${CMAKE_BUILD_TYPE}/python /opt/intel/openvino/
 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python
 
@@ -184,7 +184,7 @@ RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
     mkdir /opt/intel && \
     tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
     ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
-    ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024
+    ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025
 
 # update oneTBB
 RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \
@@ -305,12 +305,12 @@ WORKDIR /ovms/src/example/SampleCpuExtension/
 RUN make && cp libcustom_relu_cpu_extension.so /opt
 
 RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \
-    mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \
-    echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA
+    mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \
+    echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA
 
 WORKDIR /ovms
 
-ARG PROJECT_VERSION="2024.5"
+ARG PROJECT_VERSION="2025.0"
 ARG PROJECT_NAME="OpenVINO Model Server"
 LABEL description=${PROJECT_NAME}
 ARG minitrace_flags

diff --git a/Makefile b/Makefile
@@ -74,9 +74,9 @@ FUZZER_BUILD ?= 0
 # NOTE: when changing any value below, you'll need to adjust WORKSPACE file by hand:
 #         - uncomment source build section, comment binary section
 #         - adjust binary version path - version variable is not passed to WORKSPACE file!
-OV_SOURCE_BRANCH ?= db64e5c66a9fdede7ecb8473b399ac94210f5136  # releases/2024/5 2024-11-09
-OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677  # releases/2024/5 2024-10-31
-OV_TOKENIZERS_BRANCH ?= 92bec551bdb1eed94aa375415c82b4068520c7b9 # releases/2024/5 2024-11-04
+OV_SOURCE_BRANCH ?= 6733cc320915ca6bfad9036940bf5ca244b41a8b  # master / 2024-11-18
+OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677  # master / 2024-10-31
+OV_TOKENIZERS_BRANCH ?= 5ccd56de5a7da65ad7ea7c5c42cec55be97df312 # master / 2024-11-14
 
 OV_SOURCE_ORG ?= openvinotoolkit
 OV_CONTRIB_ORG ?= openvinotoolkit
@@ -166,11 +166,11 @@ ifeq ($(findstring ubuntu,$(BASE_OS)),ubuntu)
   ifeq ($(BASE_OS_TAG),20.04)
         OS=ubuntu20
 	INSTALL_DRIVER_VERSION ?= "22.43.24595"
-	DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64.tgz
+	DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2025.0.0.17377.6733cc32091_x86_64.tgz
   else ifeq  ($(BASE_OS_TAG),22.04)
         OS=ubuntu22
 	INSTALL_DRIVER_VERSION ?= "24.26.30049"
-	DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz
+	DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2025.0.0.17377.6733cc32091_x86_64.tgz
   endif
 endif
 ifeq ($(BASE_OS),redhat)
@@ -185,7 +185,7 @@ ifeq ($(BASE_OS),redhat)
   endif
   DIST_OS=redhat
   INSTALL_DRIVER_VERSION ?= "23.22.26516"
-  DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64.tgz
+  DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2025.0.0.17377.6733cc32091_x86_64.tgz
 endif
 
 OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server
@@ -201,7 +201,7 @@ endif
 OVMS_PYTHON_IMAGE_TAG ?= py
 
 PRODUCT_NAME = "OpenVINO Model Server"
-PRODUCT_VERSION ?= "2024.5"
+PRODUCT_VERSION ?= "2025.0"
 PROJECT_VER_PATCH =
 
 $(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)

diff --git a/create_package.sh b/create_package.sh
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-# This script should be used inside the build image to create a binary package based on the compiled artefacts
+# This script should be used inside the build image to create a binary package based on the compiled artifacts
 
 env
 mkdir -vp /ovms_release/bin
@@ -28,7 +28,7 @@ find /ovms/bazel-out/k8-*/bin -iname '*.so*' ! -type d ! -name "libgtest.so" ! -
 mv /ovms_release/lib/libcustom_node* /ovms_release/lib/custom_nodes/
 cd /ovms_release/lib/ ; rm -f libazurestorage.so.* ; ln -s libazurestorage.so libazurestorage.so.7 ;ln -s libazurestorage.so libazurestorage.so.7.5
 cd /ovms_release/lib/ ; rm -f libcpprest.so.2.10 ; ln -s libcpprest.so libcpprest.so.2.10
-if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2450 ; ln -s libopenvino_genai.so.2024.5.0.0 libopenvino_genai.so.2450 ; fi
+if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2500 ; ln -s libopenvino_genai.so.2025.0.0.0 libopenvino_genai.so.2500 ; fi
 rm -f /ovms_release/lib/libssl.so
 rm -f /ovms_release/lib/libsampleloader*
 

diff --git a/demos/common/export_models/requirements.txt b/demos/common/export_models/requirements.txt
@@ -2,8 +2,8 @@
 --extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly"
 --pre
 optimum-intel@git+https://github.com/huggingface/optimum-intel.git  
-openvino-tokenizers[transformers]==2024.5.* 
-openvino==2024.5.* 
+openvino-tokenizers[transformers]==2025.0.* 
+openvino==2025.0.* 
 nncf>=2.11.0 
 sentence_transformers==3.1.1 
 openai 

diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md
@@ -2,18 +2,6 @@
 This demo shows how to deploy embeddings models in the OpenVINO Model Server for text feature extractions.
 Text generation use case is exposed via OpenAI API `embeddings` endpoint.
 
-## Get the docker image
-
-Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5.
-```bash
-git clone https://github.com/openvinotoolkit/model_server.git
-cd model_server
-make release_image GPU=1
-```
-It will create an image called `openvino/model_server:latest`.
-> **Note:** This operation might take 40min or more depending on your build host.
-> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device.
-
 ## Model preparation
 > **Note** Python 3.9 or higher is needed for that step
 > 

diff --git a/demos/python_demos/clip_image_classification/download_model_requirements.txt b/demos/python_demos/clip_image_classification/download_model_requirements.txt
@@ -1,7 +1,7 @@
 --extra-index-url "https://download.pytorch.org/whl/cpu"
 --extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly"
 --pre
-openvino==2024.5.*
+openvino==2025.0.*
 numpy<2.0
 transformers==4.40.2
 pillow==10.3.0

diff --git a/demos/rerank/README.md b/demos/rerank/README.md
@@ -1,17 +1,5 @@
 # How to serve Rerank models via Cohere API {#ovms_demos_rerank}
 
-## Get the docker image
-
-Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5.
-```bash
-git clone https://github.com/openvinotoolkit/model_server.git
-cd model_server
-make release_image GPU=1
-```
-It will create an image called `openvino/model_server:latest`.
-> **Note:** This operation might take 40min or more depending on your build host.
-> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device.
-
 ## Model preparation
 > **Note** Python 3.9 or higher is needed for that step
 Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized.

diff --git a/docs/developer_guide.md b/docs/developer_guide.md
@@ -61,15 +61,15 @@ In-case of problems, see [Debugging](#debugging).
 	docker run -it -v ${PWD}:/ovms --entrypoint bash -p 9178:9178 openvino/model_server-build:latest
 	```
 
-4. In the docker container context compile the source code via :
+4. In the docker container context compile the source code via (choose distro `ubuntu` or `redhat` depending on the image type):
 	```bash
-	bazel build --config=mp_on_py_on //src:ovms
+	bazel build --//:distro=ubuntu --config=mp_on_py_on //src:ovms
 > **NOTE**: There are several options that would disable specific parts of OVMS. For details check ovms bazel build files.
 	```
 
-5. From the container, run a single unit test :
+5. From the container, run a single unit test (choose distro `ubuntu` or `redhat` depending on the image type):
 	```bash
-	bazel test --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test
+	bazel test --//:distro=ubuntu --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test
 	```
 
 | Argument      | Description |

diff --git a/external/cb.patch b/external/cb.patch
@@ -1,17 +1,29 @@
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index b08debb..4171092 100644
+index da39b5c..d3fa72b 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -62,9 +62,9 @@ endif()
+@@ -81,15 +81,15 @@ endif()
 
  add_subdirectory(thirdparty)
  add_subdirectory(src)
--add_subdirectory(samples)
--add_subdirectory(tools/continuous_batching)
--add_subdirectory(tests/cpp)
-+#add_subdirectory(samples)
-+#add_subdirectory(tools/continuous_batching)
-+#add_subdirectory(tests/cpp)
+-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples")
+-    add_subdirectory(samples)
+-endif()
+-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching")
+-    add_subdirectory(tools/continuous_batching)
+-endif()
+-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp")
+-    add_subdirectory(tests/cpp)
+-endif()
++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples")
++#    add_subdirectory(samples)
++#endif()
++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching")
++#    add_subdirectory(tools/continuous_batching)
++#endif()
++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp")
++#    add_subdirectory(tests/cpp)
++#endif()
 
  install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
  install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)

diff --git a/src/example/SampleCpuExtension/Dockerfile.redhat b/src/example/SampleCpuExtension/Dockerfile.redhat
@@ -31,7 +31,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \
     mkdir /opt/intel && \
     tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
     ln -s /opt/intel/l_openvino_* /opt/intel/openvino && \
-    ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2024
+    ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2025
 
 WORKDIR /workspace
 COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./

diff --git a/src/example/SampleCpuExtension/Dockerfile.ubuntu b/src/example/SampleCpuExtension/Dockerfile.ubuntu
@@ -32,7 +32,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \
     mkdir /opt/intel && \
     tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
     ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
-    ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024
+    ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025
 WORKDIR /workspace
 COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./
 

diff --git a/src/example/SampleCpuExtension/Makefile b/src/example/SampleCpuExtension/Makefile
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-OPENVINO_PATH ?= /opt/intel/openvino_2024
+OPENVINO_PATH ?= /opt/intel/openvino_2025
 
 all: $(eval SHELL:=/bin/bash)
 	/usr/bin/g++ --version

diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp
@@ -416,13 +416,9 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect
     // choices: array of size N, where N is related to n request parameter
     writer.String("choices");
     writer.StartArray();  // [
-    int i = 0;
-    int n = request.numReturnSequences.value_or(1);
+    int index = 0;
     usage.completionTokens = 0;
     for (const ov::genai::GenerationOutput& generationOutput : generationOutputs) {
-        if (i >= n)
-            break;
-
         SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated tokens: {}", generationOutput.generated_ids);
         usage.completionTokens += generationOutput.generated_ids.size();
         if (request.echo)
@@ -445,7 +441,7 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect
         }
         // index: integer; Choice index, only n=1 supported anyway
         writer.String("index");
-        writer.Int(i++);
+        writer.Int(index++);
         // logprobs: object/null; Log probability information for the choice. TODO
         writer.String("logprobs");
         if (this->request.logprobschat || this->request.logprobs > 0) {

diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc
@@ -189,10 +189,6 @@ class HttpLLMCalculator : public CalculatorBase {
                     return absl::CancelledError();
                 }
                 RET_CHECK(generationOutputs.size() >= 1);
-                std::sort(generationOutputs.begin(), generationOutputs.end(), [](ov::genai::GenerationOutput& r1, ov::genai::GenerationOutput& r2) {
-                    return r1.score > r2.score;
-                });
-
                 std::string response = this->apiHandler->serializeUnaryResponse(generationOutputs);
                 SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Complete unary response: {}", response);
                 cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, timestamp);

diff --git a/src/llm/llm_calculator.proto b/src/llm/llm_calculator.proto
@@ -32,20 +32,17 @@ message LLMCalculatorOptions {
     // model memory cache to allocate in GB
     optional uint64 cache_size  = 3 [default = 8];
 
-    // DEPRECATED - this option is not effective starting with 2024.5 and will be removed in 2025.0 release
-    optional uint64 block_size = 4 [default = 32];
+    optional uint64 max_num_seqs = 4 [default = 256];
 
-    optional uint64 max_num_seqs = 5 [default = 256];
+    optional bool dynamic_split_fuse = 5 [default = true];
 
-    optional bool dynamic_split_fuse = 7 [default = true];
+    optional string device = 6 [default = "CPU"];
 
-    optional string device = 8 [default = "CPU"];
+    optional string plugin_config = 7 [default = ""];
 
-    optional string plugin_config = 9 [default = ""];
+    optional uint32 best_of_limit = 8 [default = 20];
 
-    optional uint32 best_of_limit = 10 [default = 20];
+    optional uint32 max_tokens_limit = 9 [default = 4096];
 
-    optional uint32 max_tokens_limit = 11 [default = 4096];
-
-    optional bool enable_prefix_caching = 12 [default = false];
+    optional bool enable_prefix_caching = 10 [default = false];
 }
diff --git a/src/llm/llmnoderesources.cpp b/src/llm/llmnoderesources.cpp
@@ -145,11 +145,6 @@ Status LLMNodeResources::initializeLLMNodeResources(LLMNodeResources& nodeResour
         return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST;
     }
 
-    // TODO: Remove along with block_size option in the proto in 2025.x release
-    if (nodeOptions.has_block_size()) {
-        SPDLOG_LOGGER_WARN(modelmanager_logger, "Since 2024.5, block_size is selected automatically and setting it explicitly is ineffective. "
-                                                "Please remove it from the configuration as in 2025.0 it will cause error.");
-    }
     nodeResources.schedulerConfig = {
         .max_num_batched_tokens = nodeOptions.max_num_batched_tokens(),
         .cache_size = nodeOptions.cache_size(),

diff --git a/src/test/llmnode_test.cpp b/src/test/llmnode_test.cpp
@@ -2734,7 +2734,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) {
                 models_path: "/ovms/src/test/llm_testing/facebook/opt-125m"
                 max_num_batched_tokens: 1024
                 cache_size: 1
-                block_size: 8
                 max_num_seqs: 95
                 dynamic_split_fuse: false
                 enable_prefix_caching: true
@@ -2762,9 +2761,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) {
 
     ASSERT_EQ(nodeResources.schedulerConfig.max_num_batched_tokens, 1024);
     ASSERT_EQ(nodeResources.schedulerConfig.cache_size, 1);
-    // We create graph with block_size set in graph config to make sure setting it does not result in error
-    // TODO: Remove below commented assertion as well as block_size from the testPbtxt when block_size is removed from options proto.
-    // ASSERT_EQ(nodeResources.schedulerConfig.block_size, 8);
     ASSERT_EQ(nodeResources.schedulerConfig.dynamic_split_fuse, false);
     ASSERT_EQ(nodeResources.schedulerConfig.max_num_seqs, 95);
     ASSERT_EQ(nodeResources.schedulerConfig.enable_prefix_caching, true);