Skip to content

Commit

Permalink
Add method for checking memory address changes in the zero tensor
Browse files Browse the repository at this point in the history
Signed-off-by: Bogdan Pereanu <[email protected]>
  • Loading branch information
pereanub committed Jan 7, 2025
1 parent ac35818 commit 9b66d23
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 136 deletions.
18 changes: 4 additions & 14 deletions src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,10 @@
#include "zero_pipeline.hpp"
#include "zero_profiling.hpp"
#include "zero_remote_tensor.hpp"
#include "zero_tensor.hpp"

namespace intel_npu {

struct TensorInfo {
bool tensorCreatedLocally;
uint64_t originalMemoryId;
};

class ZeroInferRequest final : public SyncInferRequest {
public:
explicit ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
Expand Down Expand Up @@ -67,12 +63,9 @@ class ZeroInferRequest final : public SyncInferRequest {
std::shared_ptr<ov::ITensor>& get_level_zero_input(size_t index, size_t tensorNo = 0) const;
std::vector<std::shared_ptr<ov::ITensor>>& get_level_zero_inputs(size_t index) const;

std::shared_ptr<ov::ITensor> allocate_tensor(
const IODescriptor& descriptor,
const size_t index,
const bool isInput,
const ov::Allocator& allocator = {},
const std::optional<std::size_t> batchSize = std::nullopt) const override;
std::shared_ptr<ov::ITensor> create_tensor(ov::element::Type type,
const ov::Shape& shape,
const ov::Allocator& allocator = {}) const override;

const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
const std::shared_ptr<IGraph> _graph;
Expand All @@ -84,9 +77,6 @@ class ZeroInferRequest final : public SyncInferRequest {
mutable std::vector<std::vector<std::shared_ptr<ov::ITensor>>> _levelZeroInputTensors;
mutable std::vector<std::shared_ptr<ov::ITensor>> _levelZeroOutputTensors;

mutable std::vector<TensorInfo> _levelZeroInputTensorInfo;
mutable std::vector<TensorInfo> _levelZeroOutputTensorInfo;

ze_device_properties_t _properties = {};
std::shared_ptr<const zeroMemory::HostMemAllocator> _inputAllocator;
std::shared_ptr<const zeroMemory::HostMemAllocator> _outputAllocator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
#include "intel_npu/common/igraph.hpp"
#include "intel_npu/utils/zero/zero_utils.hpp"
#include "intel_npu/utils/zero/zero_wrappers.hpp"
#include "openvino/runtime/itensor.hpp"
#include "zero_memory.hpp"
#include "zero_profiling.hpp"
#include "zero_tensor.hpp"

namespace intel_npu {

Expand Down
6 changes: 5 additions & 1 deletion src/plugins/intel_npu/src/backend/include/zero_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ZeroTensor final : public ov::ITensor {
const ov::Shape& shape,
const ov::Allocator& allocator);

void* data(const ov::element::Type& element_type) const override;
void* data(const ov::element::Type& type = {}) const override;

const ov::element::Type& get_element_type() const override;

Expand All @@ -32,6 +32,9 @@ class ZeroTensor final : public ov::ITensor {

const ov::Strides& get_strides() const override;

bool memory_address_changed();
void reset_memory_flag();

~ZeroTensor();

private:
Expand All @@ -51,6 +54,7 @@ class ZeroTensor final : public ov::ITensor {
mutable std::once_flag _strides_once;
ov::Allocator _allocator;
void* _ptr = nullptr;
bool _reset_tensor_memory = false;
};

} // namespace intel_npu
140 changes: 33 additions & 107 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include "openvino/op/util/op_types.hpp"
#include "openvino/runtime/intel_npu/remote_properties.hpp"
#include "zero_memory.hpp"
#include "zero_tensor.hpp"

using namespace intel_npu;

Expand Down Expand Up @@ -92,17 +91,6 @@ bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, c
return false;
}

uint64_t get_memory_id(ze_context_handle_t hContext, const void* ptr) {
ze_memory_allocation_properties_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES;
auto res = intel_npu::zeMemGetAllocProperties(hContext, ptr, &desc, nullptr);
if (res != ZE_RESULT_SUCCESS) {
return 0;
}

return desc.id;
}

} // namespace

//------------------------------------------------------------------------------
Expand All @@ -116,8 +104,6 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
_logger("ZeroInferRequest", config.get<LOG_LEVEL>()),
_levelZeroInputTensors(_metadata.inputs.size(), std::vector<std::shared_ptr<ov::ITensor>>(1, nullptr)),
_levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
_levelZeroInputTensorInfo(_metadata.inputs.size(), TensorInfo{false, 0}),
_levelZeroOutputTensorInfo(_metadata.outputs.size(), TensorInfo{false, 0}),
_profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE),
_profilingQuery(_initStructs, 0) {
_logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
Expand Down Expand Up @@ -196,7 +182,6 @@ void ZeroInferRequest::create_pipeline() {
INPUT,
*_inputAllocator,
_graph->get_batch_size());
_levelZeroInputTensorInfo.at(inputIndex).tensorCreatedLocally = true;
}

for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) {
Expand All @@ -212,7 +197,6 @@ void ZeroInferRequest::create_pipeline() {
OUTPUT,
*_outputAllocator,
_graph->get_batch_size());
_levelZeroOutputTensorInfo.at(outputIndex).tensorCreatedLocally = true;
}

if (_initStructs->getMutableCommandListVersion()) {
Expand All @@ -229,9 +213,6 @@ void ZeroInferRequest::create_pipeline() {
if (std::dynamic_pointer_cast<ZeroRemoteTensor>(get_level_zero_input(inputIndex)) != nullptr) {
continue;
}

_levelZeroInputTensorInfo.at(inputIndex).originalMemoryId =
get_memory_id(_initStructs->getContext(), get_level_zero_input(inputIndex)->data());
}

for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) {
Expand All @@ -243,9 +224,6 @@ void ZeroInferRequest::create_pipeline() {
if (std::dynamic_pointer_cast<ZeroRemoteTensor>(_levelZeroOutputTensors.at(outputIndex)) != nullptr) {
continue;
}

_levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId =
get_memory_id(_initStructs->getContext(), _levelZeroOutputTensors.at(outputIndex)->data());
}
}

Expand Down Expand Up @@ -275,24 +253,15 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
const bool isInput) {
OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data");
auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index);
auto& tensorCreatedLocally = isInput ? _levelZeroInputTensorInfo.at(index).tensorCreatedLocally
: _levelZeroOutputTensorInfo.at(index).tensorCreatedLocally;

bool setTensorData = false;
bool levelZeroTensorCreatedLocally = true;

OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation");
if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) {
_logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context");
levelZeroTensors = tensor;
levelZeroTensorCreatedLocally = false;
setTensorData = true;
}

if (!setTensorData) {
// make sure that the L0 tensor was allocated locally and is not received from the user when receiving
// random tensor
if (!tensorCreatedLocally) {
const auto& zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(tensor);

if (zeroTensor == nullptr) {
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation");
if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) {
_logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context");
levelZeroTensors = tensor;
} else {
_logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor");
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor");

Expand All @@ -301,28 +270,16 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
isInput,
isInput ? *_inputAllocator : *_outputAllocator,
_graph->get_batch_size());

setTensorData = true;
levelZeroTensorCreatedLocally = true;
}
}

if (setTensorData) {
tensorCreatedLocally = levelZeroTensorCreatedLocally;

if (_pipelineIsCreated) {
_logger.debug("ZeroInferRequest::infer_async - update command list");

auto& updateOriginalAddress = isInput ? _levelZeroInputTensorInfo.at(index).originalMemoryId
: _levelZeroOutputTensorInfo.at(index).originalMemoryId;

OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList");
_pipeline->updateCommandList(levelZeroTensors->data(),
levelZeroTensors->get_byte_size(),
isInput ? _graph->get_input_descriptors().at(index).idx
: _graph->get_output_descriptors().at(index).idx);

updateOriginalAddress = get_memory_id(_initStructs->getContext(), levelZeroTensors->data());
}
}
}
Expand All @@ -344,11 +301,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTe
}

auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index);
auto& tensorCreatedLocally = isInput ? _levelZeroInputTensorInfo.at(index).tensorCreatedLocally
: _levelZeroOutputTensorInfo.at(index).tensorCreatedLocally;

levelZeroTensors = tensor;
tensorCreatedLocally = false;

if (_pipelineIsCreated) {
_logger.debug("ZeroInferRequest::infer_async - update command list");
Expand Down Expand Up @@ -500,17 +453,13 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
metadata.nodeFriendlyName.c_str());

auto& levelZeroTensors = isInput ? get_level_zero_input(ioIndex) : _levelZeroOutputTensors.at(ioIndex);
auto& tensorCreatedLocally = isInput ? _levelZeroInputTensorInfo.at(ioIndex).tensorCreatedLocally
: _levelZeroOutputTensorInfo.at(ioIndex).tensorCreatedLocally;

levelZeroTensors = allocate_tensor(metadata,
ioIndex,
isInput,
isInput ? *_inputAllocator : *_outputAllocator,
_graph->get_batch_size());

tensorCreatedLocally = true;

return levelZeroTensors;
}

Expand Down Expand Up @@ -646,15 +595,20 @@ void ZeroInferRequest::infer_async() {
continue;
}

auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor.at(SINGLE_TENSOR)->data());
auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensor.at(SINGLE_TENSOR));

if (_levelZeroInputTensorInfo.at(inputIndex).originalMemoryId != memoryId) {
if (zeroTensor == nullptr) {
++inputIndex;
continue;
}

if (zeroTensor->memory_address_changed()) {
_logger.debug("Update input graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor.at(SINGLE_TENSOR)->data(),
levelZeroTensor.at(SINGLE_TENSOR)->get_byte_size(),
_pipeline->updateCommandList(zeroTensor->data(),
zeroTensor->get_byte_size(),
_graph->get_input_descriptors().at(inputIndex).idx);

_levelZeroInputTensorInfo.at(inputIndex).originalMemoryId = memoryId;
zeroTensor->reset_memory_flag();
}

++inputIndex;
Expand All @@ -674,15 +628,20 @@ void ZeroInferRequest::infer_async() {
continue;
}

auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor->data());
auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensor);

if (zeroTensor == nullptr) {
++outputIndex;
continue;
}

if (_levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId != memoryId) {
if (zeroTensor->memory_address_changed()) {
_logger.debug("Update output graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor->data(),
levelZeroTensor->get_byte_size(),
_pipeline->updateCommandList(zeroTensor->data(),
zeroTensor->get_byte_size(),
_graph->get_output_descriptors().at(outputIndex).idx);

_levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId = memoryId;
zeroTensor->reset_memory_flag();
}

++outputIndex;
Expand Down Expand Up @@ -810,45 +769,12 @@ std::vector<ov::ProfilingInfo> ZeroInferRequest::get_profiling_info() const {
}
}

std::shared_ptr<ov::ITensor> ZeroInferRequest::allocate_tensor(const IODescriptor& descriptor,
const size_t index,
const bool isInput,
const ov::Allocator& allocator,
const std::optional<std::size_t> batchSize) const {
check_network_precision(descriptor.precision);

std::shared_ptr<ov::ITensor> tensor;
ov::Shape allocatedTensorShape = descriptor.shapeFromCompiler.get_max_shape();

if (batchSize.has_value()) {
allocatedTensorShape[BATCH_AXIS] = *batchSize;
}

if (descriptor.isStateOutput) {
// Only one buffer is required for each (state input, state output) pair, acting as an input before running the
// inference and as an output after performing it. Thus both the "state input" and "state output" entries shall
// point to the same buffer.
OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(),
"The link between state descriptors is missing, state name: ",
descriptor.nameFromCompiler);
tensor = get_user_input(*descriptor.relatedDescriptorIndex)._ptr;
} else {
tensor = std::make_shared<ZeroTensor>(_initStructs, descriptor.precision, allocatedTensorShape, allocator);
}

if (isInput) {
if (get_user_input(index) == nullptr) {
get_user_input(index) = tensor;
}

if (descriptor.isStateInput) {
_variableStates.push_back(std::make_shared<VariableState>(descriptor.nameFromCompiler, tensor));
}
} else if (_userOutputTensors.at(index) == nullptr) {
_userOutputTensors.at(index) = tensor;
}
std::shared_ptr<ov::ITensor> ZeroInferRequest::create_tensor(ov::element::Type type,
const ov::Shape& shape,
const ov::Allocator& allocator) const {
OPENVINO_ASSERT(allocator, "Allocator mush be provided when creating a zero tensor!");

return tensor;
return std::make_shared<ZeroTensor>(_initStructs, type, shape, allocator);
}

std::vector<uint8_t> ZeroInferRequest::get_raw_profiling_data() const {
Expand Down
14 changes: 10 additions & 4 deletions src/plugins/intel_npu/src/backend/src/zero_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,6 @@ void ZeroTensor::set_shape(ov::Shape new_shape) {
_shape = std::move(new_shape);

if (get_size() > get_capacity()) {
#ifdef __linux__
OPENVINO_THROW("Re-shaping the tensor with a larger shape is not available.");
#endif

if (!_init_structs->getMutableCommandListVersion()) {
OPENVINO_THROW("Re-shaping the tensor with a larger shape is not available using this driver version. "
"Please update the driver.");
Expand All @@ -131,12 +127,22 @@ void ZeroTensor::set_shape(ov::Shape new_shape) {
_capacity = _shape;
_ptr = _allocator.allocate(get_bytes_capacity());
initialize_elements(_ptr, _element_type, _shape);

_reset_tensor_memory = true;
}

_strides.clear();
update_strides();
}

bool ZeroTensor::memory_address_changed() {
return _reset_tensor_memory;
}

void ZeroTensor::reset_memory_flag() {
_reset_tensor_memory = false;
}

ZeroTensor::~ZeroTensor() {
destroy_memory();
}
Expand Down
Loading

0 comments on commit 9b66d23

Please sign in to comment.