From 247441fb311b9072f63b8f1dc2333de8b220c2de Mon Sep 17 00:00:00 2001
From: Zhang Yi3 <yi3.zhang@intel.com>
Date: Tue, 26 Nov 2024 18:35:00 -0800
Subject: [PATCH 1/6] [CPU]Check runtime_options from IR model

---
 src/inference/src/dev/core_impl.cpp           | 23 +++++++++++++++++--
 src/inference/src/dev/core_impl.hpp           |  6 +++++
 .../ov_executable_network/properties.cpp      | 14 +++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 244d27b5eebb67..a0360b8f9c4c41 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -736,7 +736,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     ov::AnyMap config_with_batch = config;
     // if auto-batching is applicable, the below function will patch the device name and config accordingly:
     auto model = apply_auto_batching(model_, deviceName, config_with_batch);
-
+    apply_rt_info(model_, config_with_batch);
     auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(device_name));
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
@@ -769,7 +769,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     ov::AnyMap config_with_batch = config;
     // if auto-batching is applicable, the below function will patch the device name and config accordingly:
     auto model = apply_auto_batching(model_, deviceName, config_with_batch);
-
+    apply_rt_info(model_, config_with_batch);
     auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(deviceName));
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
@@ -1098,6 +1098,25 @@ std::shared_ptr<const ov::Model> ov::CoreImpl::apply_auto_batching(const std::sh
     return ov::details::apply_batch_affinity(model, deviceNameWithoutBatch);
 }
 
+void ov::CoreImpl::apply_rt_info(const std::shared_ptr<const ov::Model>& model,
+                                 ov::AnyMap& config) const {
+    if (model->has_rt_info({"runtime_options", "KV_CACHE_PRECISION"})) {
+        if (config.find("KV_CACHE_PRECISION") == config.end()) {
+            const auto kv_cache_precision =
+                model->get_rt_info<ov::element::Type>({"runtime_options", "KV_CACHE_PRECISION"});
+            config.insert(ov::hint::kv_cache_precision(kv_cache_precision));
+        }
+    }
+    if (model->has_rt_info({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"})) {
+        if (config.find("DYNAMIC_QUANTIZATION_GROUP_SIZE") == config.end()) {
+            const auto dyn_quant_group_size =
+                model->get_rt_info<uint64_t>({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"});
+            config.insert(ov::hint::dynamic_quantization_group_size(dyn_quant_group_size));
+        }
+    }
+}
+
+
 void ov::CoreImpl::set_property(const std::string& device_name, const AnyMap& properties) {
     OPENVINO_ASSERT(device_name.find("HETERO:") != 0,
                     "set_property is supported only for HETERO itself (without devices). "
diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
index 7cf12f3ba3280c..6fb63f2ef4e522 100644
--- a/src/inference/src/dev/core_impl.hpp
+++ b/src/inference/src/dev/core_impl.hpp
@@ -200,6 +200,12 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
                                                          std::string& deviceName,
                                                          ov::AnyMap& config) const;
 
+    /*
+     * @brief Apply rt_info from IR model
+     */
+    void apply_rt_info(const std::shared_ptr<const ov::Model>& model,
+                    ov::AnyMap& config) const;
+
     /*
      * @brief Register plugins according to the build configuration
      */
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index 8ec0900bc7d176..37845422195a95 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -327,4 +327,18 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckCPUExecutionDevice) {
     ASSERT_EQ(value.as<std::string>(), "CPU");
 }
 
+TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckCPURuntimOptions) {
+    ov::Core ie;
+    ov::Any type;
+    ov::Any size;
+    ov::CompiledModel compiledModel;
+    model->set_rt_info("f16", "runtime_options", "KV_CACHE_PRECISION");
+    model->set_rt_info("0", "runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE");
+    OV_ASSERT_NO_THROW(compiledModel = ie.compile_model(model, deviceName));
+    OV_ASSERT_NO_THROW(type = compiledModel.get_property(ov::hint::kv_cache_precision));
+    OV_ASSERT_NO_THROW(size = compiledModel.get_property(ov::hint::dynamic_quantization_group_size));
+    ASSERT_EQ(type.as<ov::element::Type>(), ov::element::f16);
+    ASSERT_EQ(size.as<uint64_t>(), 0);
+}
+
 } // namespace

From c6eeb4db818a89b3fcd3089c93bc7c5f39c0dda1 Mon Sep 17 00:00:00 2001
From: Zhang Yi3 <yi3.zhang@intel.com>
Date: Tue, 26 Nov 2024 18:51:04 -0800
Subject: [PATCH 2/6] fix code style

---
 src/inference/src/dev/core_impl.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index a0360b8f9c4c41..d5227eeab8cbef 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -1098,8 +1098,7 @@ std::shared_ptr<const ov::Model> ov::CoreImpl::apply_auto_batching(const std::sh
     return ov::details::apply_batch_affinity(model, deviceNameWithoutBatch);
 }
 
-void ov::CoreImpl::apply_rt_info(const std::shared_ptr<const ov::Model>& model,
-                                 ov::AnyMap& config) const {
+void ov::CoreImpl::apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::AnyMap& config) const {
     if (model->has_rt_info({"runtime_options", "KV_CACHE_PRECISION"})) {
         if (config.find("KV_CACHE_PRECISION") == config.end()) {
             const auto kv_cache_precision =
@@ -1116,7 +1115,6 @@ void ov::CoreImpl::apply_rt_info(const std::shared_ptr<const ov::Model>& model,
     }
 }
 
-
 void ov::CoreImpl::set_property(const std::string& device_name, const AnyMap& properties) {
     OPENVINO_ASSERT(device_name.find("HETERO:") != 0,
                     "set_property is supported only for HETERO itself (without devices). "

From 0d954808e3c60cdfbe7ef30fc1a130b6b7db4c99 Mon Sep 17 00:00:00 2001
From: Zhang Yi3 <yi3.zhang@intel.com>
Date: Wed, 27 Nov 2024 17:00:59 -0800
Subject: [PATCH 3/6] Revert "[CPU]Check runtime_options from IR model"

This reverts commit 247441fb311b9072f63b8f1dc2333de8b220c2de.
---
 src/inference/src/dev/core_impl.cpp | 21 ++-------------------
 src/inference/src/dev/core_impl.hpp |  6 ------
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index d5227eeab8cbef..244d27b5eebb67 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -736,7 +736,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     ov::AnyMap config_with_batch = config;
     // if auto-batching is applicable, the below function will patch the device name and config accordingly:
     auto model = apply_auto_batching(model_, deviceName, config_with_batch);
-    apply_rt_info(model_, config_with_batch);
+
     auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(device_name));
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
@@ -769,7 +769,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     ov::AnyMap config_with_batch = config;
     // if auto-batching is applicable, the below function will patch the device name and config accordingly:
     auto model = apply_auto_batching(model_, deviceName, config_with_batch);
-    apply_rt_info(model_, config_with_batch);
+
     auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(deviceName));
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
@@ -1098,23 +1098,6 @@ std::shared_ptr<const ov::Model> ov::CoreImpl::apply_auto_batching(const std::sh
     return ov::details::apply_batch_affinity(model, deviceNameWithoutBatch);
 }
 
-void ov::CoreImpl::apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::AnyMap& config) const {
-    if (model->has_rt_info({"runtime_options", "KV_CACHE_PRECISION"})) {
-        if (config.find("KV_CACHE_PRECISION") == config.end()) {
-            const auto kv_cache_precision =
-                model->get_rt_info<ov::element::Type>({"runtime_options", "KV_CACHE_PRECISION"});
-            config.insert(ov::hint::kv_cache_precision(kv_cache_precision));
-        }
-    }
-    if (model->has_rt_info({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"})) {
-        if (config.find("DYNAMIC_QUANTIZATION_GROUP_SIZE") == config.end()) {
-            const auto dyn_quant_group_size =
-                model->get_rt_info<uint64_t>({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"});
-            config.insert(ov::hint::dynamic_quantization_group_size(dyn_quant_group_size));
-        }
-    }
-}
-
 void ov::CoreImpl::set_property(const std::string& device_name, const AnyMap& properties) {
     OPENVINO_ASSERT(device_name.find("HETERO:") != 0,
                     "set_property is supported only for HETERO itself (without devices). "
diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
index 6fb63f2ef4e522..7cf12f3ba3280c 100644
--- a/src/inference/src/dev/core_impl.hpp
+++ b/src/inference/src/dev/core_impl.hpp
@@ -200,12 +200,6 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
                                                          std::string& deviceName,
                                                          ov::AnyMap& config) const;
 
-    /*
-     * @brief Apply rt_info from IR model
-     */
-    void apply_rt_info(const std::shared_ptr<const ov::Model>& model,
-                    ov::AnyMap& config) const;
-
     /*
      * @brief Register plugins according to the build configuration
      */

From b64d31b258b358677dcb62c07bd45f8ad2b757b6 Mon Sep 17 00:00:00 2001
From: Zhang Yi3 <yi3.zhang@intel.com>
Date: Wed, 27 Nov 2024 17:26:46 -0800
Subject: [PATCH 4/6] [CPU]move runtim_option check to cpu plugin

---
 src/plugins/intel_cpu/src/plugin.cpp | 14 +++++++++++++-
 src/plugins/intel_cpu/src/plugin.h   |  2 +-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index b74d4f7c8acbbb..7dcde369bf9ff6 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -206,6 +206,16 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
     return Config::ModelType::Unknown;
 }
 
+void Plugin::apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::intel_cpu::Config& config) const {
+    if (model->has_rt_info({"runtime_options", "KV_CACHE_PRECISION"})) {
+        config.kvCachePrecision = model->get_rt_info<ov::element::Type>({"runtime_options", "KV_CACHE_PRECISION"});
+    }
+    if (model->has_rt_info({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"})) {
+        config.fcDynamicQuantizationGroupSize =
+            model->get_rt_info<uint64_t>({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"});
+    }
+}
+
 std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
                                                           const ov::AnyMap& orig_config) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Plugin::compile_model");
@@ -247,6 +257,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     // update the props after the perf mode translated to configs
     // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
     Config conf = engConfig;
+    apply_rt_info(cloned_model, conf);
     conf.readProperties(config, modelType);
 
     Transformations transformations(cloned_model, conf);
@@ -520,6 +531,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 
     Config conf = engConfig;
     Config::ModelType modelType = getModelType(model);
+    apply_rt_info(model, conf);
     conf.readProperties(config, modelType);
 
     auto context = std::make_shared<GraphContext>(conf, fake_w_cache, false);
@@ -575,7 +587,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
 
     Config conf = engConfig;
     Config::ModelType modelType = getModelType(model);
-
+    apply_rt_info(model, conf);
     // check ov::loaded_from_cache property and erase it to avoid exception in readProperties.
     auto _config = config;
     const auto& it = _config.find(ov::loaded_from_cache.name());
diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
index 2548ba2c1cc8af..414811a2a2a5b7 100644
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -50,7 +50,7 @@ class Plugin : public ov::IPlugin {
 
     void get_performance_streams(Config& config, const std::shared_ptr<ov::Model>& model) const;
     void calculate_streams(Config& conf, const std::shared_ptr<ov::Model>& model, bool imported = false) const;
-
+    void apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::intel_cpu::Config& config) const;
     Config engConfig;
     /* Explicily configured streams have higher priority than performance hints.
        So track if streams is set explicitly (not auto-configured) */

From 88fec83af06035a0d95336cba4bb20343f5517a8 Mon Sep 17 00:00:00 2001
From: Zhang Yi3 <yi3.zhang@intel.com>
Date: Thu, 28 Nov 2024 03:03:03 -0800
Subject: [PATCH 5/6] apply review comments

---
 src/plugins/intel_cpu/src/plugin.cpp                      | 8 ++++----
 .../custom/behavior/ov_executable_network/properties.cpp  | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 7dcde369bf9ff6..2b9253d0e41f2c 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -207,12 +207,12 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
 }
 
 void Plugin::apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::intel_cpu::Config& config) const {
-    if (model->has_rt_info({"runtime_options", "KV_CACHE_PRECISION"})) {
-        config.kvCachePrecision = model->get_rt_info<ov::element::Type>({"runtime_options", "KV_CACHE_PRECISION"});
+    if (model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) {
+        config.kvCachePrecision = model->get_rt_info<ov::element::Type>({"runtime_options", ov::hint::kv_cache_precision.name()});
     }
-    if (model->has_rt_info({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"})) {
+    if (model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) {
         config.fcDynamicQuantizationGroupSize =
-            model->get_rt_info<uint64_t>({"runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE"});
+            model->get_rt_info<uint64_t>({"runtime_options", ov::hint::dynamic_quantization_group_size.name()});
     }
 }
 
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index 37845422195a95..e0a3f7e30f10f0 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -332,8 +332,8 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckCPURuntimOptions) {
     ov::Any type;
     ov::Any size;
     ov::CompiledModel compiledModel;
-    model->set_rt_info("f16", "runtime_options", "KV_CACHE_PRECISION");
-    model->set_rt_info("0", "runtime_options", "DYNAMIC_QUANTIZATION_GROUP_SIZE");
+    model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name());
+    model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name());
     OV_ASSERT_NO_THROW(compiledModel = ie.compile_model(model, deviceName));
     OV_ASSERT_NO_THROW(type = compiledModel.get_property(ov::hint::kv_cache_precision));
     OV_ASSERT_NO_THROW(size = compiledModel.get_property(ov::hint::dynamic_quantization_group_size));

From 54500577aeb77e4eefcc0d8c35540903aa99936f Mon Sep 17 00:00:00 2001
From: Zhang Yi3 <yi3.zhang@intel.com>
Date: Thu, 28 Nov 2024 22:20:18 -0800
Subject: [PATCH 6/6] [CPU]make Config check runtime_options

---
 src/plugins/intel_cpu/src/config.cpp            | 10 ++++++++++
 src/plugins/intel_cpu/src/config.h              |  2 ++
 src/plugins/intel_cpu/src/plugin.cpp            | 16 +++-------------
 src/plugins/intel_cpu/src/plugin.h              |  1 -
 .../ov_executable_network/properties.cpp        | 17 +++++++++++++++++
 5 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 92470ca063a4c0..8c90c5aeb11ad3 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -457,5 +457,15 @@ void Config::updateProperties() {
     _config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)});
 }
 
+void Config::applyRtInfo(const std::shared_ptr<const ov::Model>& model) {
+    if (model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) {
+        this->kvCachePrecision = model->get_rt_info<ov::element::Type>({"runtime_options", ov::hint::kv_cache_precision.name()});
+    }
+    if (model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) {
+        this->fcDynamicQuantizationGroupSize =
+            model->get_rt_info<uint64_t>({"runtime_options", ov::hint::dynamic_quantization_group_size.name()});
+    }
+}
+
 }  // namespace intel_cpu
 }  // namespace ov
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 5f4bb25ede350e..a8439d87803fd4 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -106,6 +106,8 @@ struct Config {
 
     void updateProperties();
 
+    void applyRtInfo(const std::shared_ptr<const ov::Model>& model);
+
     std::map<std::string, std::string> _config;
 
     int modelPreferThreads = -1;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 2b9253d0e41f2c..6fdbf7a4ea4dee 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -206,16 +206,6 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
     return Config::ModelType::Unknown;
 }
 
-void Plugin::apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::intel_cpu::Config& config) const {
-    if (model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) {
-        config.kvCachePrecision = model->get_rt_info<ov::element::Type>({"runtime_options", ov::hint::kv_cache_precision.name()});
-    }
-    if (model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) {
-        config.fcDynamicQuantizationGroupSize =
-            model->get_rt_info<uint64_t>({"runtime_options", ov::hint::dynamic_quantization_group_size.name()});
-    }
-}
-
 std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
                                                           const ov::AnyMap& orig_config) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Plugin::compile_model");
@@ -257,7 +247,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     // update the props after the perf mode translated to configs
     // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
     Config conf = engConfig;
-    apply_rt_info(cloned_model, conf);
+    conf.applyRtInfo(cloned_model);
     conf.readProperties(config, modelType);
 
     Transformations transformations(cloned_model, conf);
@@ -531,7 +521,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 
     Config conf = engConfig;
     Config::ModelType modelType = getModelType(model);
-    apply_rt_info(model, conf);
+    conf.applyRtInfo(model);
     conf.readProperties(config, modelType);
 
     auto context = std::make_shared<GraphContext>(conf, fake_w_cache, false);
@@ -587,7 +577,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
 
     Config conf = engConfig;
     Config::ModelType modelType = getModelType(model);
-    apply_rt_info(model, conf);
+    conf.applyRtInfo(model);
     // check ov::loaded_from_cache property and erase it to avoid exception in readProperties.
     auto _config = config;
     const auto& it = _config.find(ov::loaded_from_cache.name());
diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
index 414811a2a2a5b7..8973478d30403f 100644
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -50,7 +50,6 @@ class Plugin : public ov::IPlugin {
 
     void get_performance_streams(Config& config, const std::shared_ptr<ov::Model>& model) const;
     void calculate_streams(Config& conf, const std::shared_ptr<ov::Model>& model, bool imported = false) const;
-    void apply_rt_info(const std::shared_ptr<const ov::Model>& model, ov::intel_cpu::Config& config) const;
     Config engConfig;
     /* Explicily configured streams have higher priority than performance hints.
        So track if streams is set explicitly (not auto-configured) */
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index e0a3f7e30f10f0..a014eeb2cecdac 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -341,4 +341,21 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckCPURuntimOptions) {
     ASSERT_EQ(size.as<uint64_t>(), 0);
 }
 
+TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckCPURuntimOptionsWithCompileConfig) {
+    ov::Core ie;
+    ov::Any type;
+    ov::Any size;
+    ov::CompiledModel compiledModel;
+    model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name());
+    model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name());
+    ov::AnyMap config;
+    config[ov::hint::kv_cache_precision.name()] = "u8";
+    config[ov::hint::dynamic_quantization_group_size.name()] = "16";
+    OV_ASSERT_NO_THROW(compiledModel = ie.compile_model(model, deviceName, config));
+    OV_ASSERT_NO_THROW(type = compiledModel.get_property(ov::hint::kv_cache_precision));
+    OV_ASSERT_NO_THROW(size = compiledModel.get_property(ov::hint::dynamic_quantization_group_size));
+    ASSERT_EQ(type.as<ov::element::Type>(), ov::element::u8);
+    ASSERT_EQ(size.as<uint64_t>(), 16);
+}
+
 } // namespace