diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 0ffde116f4efc..a0bcf953938d9 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -70,7 +70,10 @@ BackendManager::BackendManager(const GlobalContext& global_context, i++; } subgraph_context_.subgraph_name = fused_node.Name(); - auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger); + std::unique_ptr model_proto; + if (!ep_ctx_handle_.IsValidOVEPCtxGraph()) { + model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger); + } std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type; if (ModelHasSymbolicInputDims(subgraph)) { diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index b0a7f46521cce..1409df2394d2f 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -98,7 +98,9 @@ Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, b auto node = graph_viewer.GetNode(0); auto& attrs = node->GetAttributes(); ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0); - model_stream_ = std::make_shared(attrs.at(EP_CACHE_CONTEXT).s()); + + ep_cache_context_attribute_ = &attrs.at(EP_CACHE_CONTEXT); + ep_context_embed_mode = static_cast(attrs.at(EMBED_MODE).i()); LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; @@ -106,6 +108,15 @@ Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, b return Status::OK(); } +const std::string &EPCtxHandler::GetModelBlobStream() const { + static std::string empty; + if (ep_cache_context_attribute_ != nullptr) { + return ep_cache_context_attribute_->s(); + } else { + return empty; + } +} + bool EPCtxHandler::CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const { for (int i = 0; i < graph_viewer.MaxNodeIndex(); ++i) { auto node = graph_viewer.GetNode(i); diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index c7ee943dff761..cbbbe74cd056f 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -23,7 +23,7 @@ static const char SOURCE[] = "source"; class EPCtxHandler { public: EPCtxHandler() = default; - EPCtxHandler(const EPCtxHandler&) = default; + EPCtxHandler(const EPCtxHandler&) = delete; Status ExportEPCtxModel(const GraphViewer& graph_viewer, const std::string& graph_name, const logging::Logger& logger, @@ -33,11 +33,11 @@ class EPCtxHandler { Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode); bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const; bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; } - [[nodiscard]] const std::shared_ptr GetModelBlobStream() const { return model_stream_; } + const std::string &GetModelBlobStream() const; private: bool is_valid_ep_ctx_graph_{false}; - std::shared_ptr model_stream_; + const onnx::AttributeProto* ep_cache_context_attribute_; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 7e8681d304abf..12ab7ecede031 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -109,7 +109,7 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model, } } -OVExeNetwork OVCore::ImportModel(std::shared_ptr model_stream, +OVExeNetwork OVCore::ImportModel(const std::string& model_string, std::string hw_target, const ov::AnyMap& device_config, bool embed_mode, @@ -117,10 +117,10 @@ OVExeNetwork OVCore::ImportModel(std::shared_ptr model_strea try { ov::CompiledModel obj; if (embed_mode) { - obj = oe.import_model(*model_stream, hw_target, device_config); + std::istringstream model_stream(model_string); + obj = oe.import_model(model_stream, hw_target, device_config); } else { - std::string blob_file_path = (*model_stream).str(); - std::ifstream modelStream(blob_file_path, std::ios_base::binary | std::ios_base::in); + std::ifstream modelStream(model_string, std::ios_base::binary | std::ios_base::in); obj = oe.import_model(modelStream, hw_target, {}); diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index f4da4ea3e3244..0110a0246ddd3 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -54,7 +54,7 @@ class OVCore { ov::AnyMap& device_config, const std::string& name); // OV Interface for Import model Stream - OVExeNetwork ImportModel(std::shared_ptr model_stream, + OVExeNetwork ImportModel(const std::string &model_string, std::string hw_target, const ov::AnyMap& device_config, bool embed_mode,