Skip to content

Commit

Permalink
NPUW: Deref (#27799)
Browse files Browse the repository at this point in the history
Mirror of #27767 
Prerequisite to be merged first:
#27798

---------

Co-authored-by: Dmitry Matveev <[email protected]>
  • Loading branch information
smirnov-alexey and dmatveev authored Nov 30, 2024
1 parent 74d229c commit aa87a78
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 83 deletions.
18 changes: 18 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,

// Finalize memory in closures and weight banks
finalize_weights_bank();
detach_memory();

// Print stats report when possible
{
Expand Down Expand Up @@ -499,6 +500,23 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
LOG_INFO("Done.");
}

void ov::npuw::CompiledModel::detach_memory() {
LOG_INFO("Detaching model & weight memory...");
LOG_BLOCK();
for (size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
auto& comp_model_desc = m_compiled_submodels[idx];
auto& proto_comp_model_desc = m_compiled_submodels[comp_model_desc.replaced_by.value_or(idx)];
if (!proto_comp_model_desc.model || !proto_comp_model_desc.compiled_model) {
continue; // optimized-out OR already cleared - skip
}
if (proto_comp_model_desc.device_it + 1 == m_dev_list.end()) {
LOG_INFO("No fallback expected - clear the OV model for Subgraph[" << idx << "]");
proto_comp_model_desc.model.reset();
}
}
LOG_INFO("Done");
}

std::string ov::npuw::CompiledModel::global_mem_device() const {
// Force globally set device if set
const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>();
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ class CompiledModel : public ov::ICompiledModel {
void implement_properties();

void finalize_weights_bank();
void detach_memory();

std::string global_mem_device() const;
std::string funcall_mem_device(const std::size_t idx) const;

Expand Down
115 changes: 74 additions & 41 deletions src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,34 @@ namespace npuw {
namespace weights {
namespace op {
struct Const {
std::shared_ptr<ov::op::v0::Constant> node;

std::shared_ptr<ov::op::v0::Constant> m_node;
ov::element::Type m_cached_type;
ov::Shape m_cached_shape;
const void* m_cached_ptr = nullptr;

explicit Const(std::shared_ptr<ov::op::v0::Constant> n) : m_node(n) {
m_cached_type = m_node->get_element_type();
m_cached_shape = m_node->get_shape();
m_cached_ptr = m_node->get_data_ptr();
}
std::size_t hash() const {
std::size_t seed = std::hash<const void*>()(node->get_data_ptr()) + 0x9e3779b9;
seed ^= node->get_element_type().hash() + 0x9e3779b9;
for (const auto& dim : node->get_shape()) {
std::size_t seed = std::hash<const void*>()(m_cached_ptr) + 0x9e3779b9;
seed ^= m_cached_type.hash() + 0x9e3779b9;
for (const auto& dim : m_cached_shape) {
seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
}
return seed;
}
bool operator==(const Const& other) const {
return (node->get_shape() == other.node->get_shape() &&
node->get_element_type() == other.node->get_element_type() &&
node->get_data_ptr() == other.node->get_data_ptr());
return (m_cached_type == other.m_cached_type && m_cached_shape == other.m_cached_shape &&
m_cached_ptr == other.m_cached_ptr);
}
ov::Tensor eval() const {
return ov::npuw::util::tensor_from_const(node);
NPUW_ASSERT(m_node && "Const::eval() can only happen before detach");
return ov::npuw::util::tensor_from_const(m_node);
}
void detach() {
m_node.reset();
}
};
struct Concat {
Expand All @@ -59,6 +70,11 @@ struct Concat {
}
return ov::npuw::util::concat(to_concat, axis);
}
void detach() {
for (auto&& lt : tensors) {
lt.detach();
}
}
};

struct Unpack {
Expand Down Expand Up @@ -95,6 +111,11 @@ struct Unpack {
}
return dst;
}
void detach() {
w.detach();
z.detach();
s.detach();
}
};
struct Permute {
LazyTensor tensor;
Expand All @@ -113,6 +134,9 @@ struct Permute {
ov::Tensor eval() const {
return ov::npuw::util::permute(tensor.eval(), axes);
}
void detach() {
tensor.detach();
}
};
struct Convert {
LazyTensor tensor;
Expand All @@ -130,23 +154,26 @@ struct Convert {
NPUW_ASSERT(ov::element::f16 == type);
return ov::npuw::util::to_f16(tensor.eval());
}
void detach() {
tensor.detach();
}
};
} // namespace op

using Transform = std::variant<op::Const, op::Concat, op::Unpack, op::Permute, op::Convert>;

struct LazyTensorImpl {
public:
LazyTensorImpl() = default;
explicit LazyTensorImpl(Transform&& t);
bool operator==(const LazyTensorImpl& other) const;

ov::Tensor eval() const;

bool operator==(const LazyTensorImpl& other) const;
std::size_t get_hash() const;

void detach();

Transform m_transform;
std::size_t m_hash = 0;
const std::size_t m_hash = 0;
};

} // namespace weights
Expand All @@ -165,26 +192,12 @@ struct overloaded : Ts... {
template <class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;

std::size_t LazyTensorImpl::get_hash() const {
// Already calculated
if (m_hash != 0) {
return m_hash;
}

// Get hash
std::size_t seed = 0;
std::visit(overloaded{[&seed](const auto& op) {
seed ^= op.hash();
}},
m_transform);

return seed;
}

LazyTensorImpl::LazyTensorImpl(Transform&& t) {
m_transform = std::move(t);
m_hash = get_hash();
}
LazyTensorImpl::LazyTensorImpl(Transform&& t)
: m_transform(std::move(t)),
m_hash(std::visit(overloaded{[](const auto& op) {
return op.hash();
}},
m_transform)) {}

bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const {
return m_hash == other.m_hash && m_transform == other.m_transform;
Expand All @@ -200,17 +213,25 @@ ov::Tensor LazyTensorImpl::eval() const {
some kind of indicator that the only difference is concat and we should look for an existing ov::Tensor.
Perhaps it should be done after model compilation and not handled here.
*/
return std::visit(overloaded{[](const auto& op) {
return op.eval();
}},
m_transform);
}

std::size_t LazyTensorImpl::get_hash() const {
return m_hash;
}

ov::Tensor result = std::visit(overloaded{[](const auto& op) {
return op.eval();
}},
m_transform);
NPUW_ASSERT(result);
return result;
void LazyTensorImpl::detach() {
std::visit(overloaded{[](auto& op) {
op.detach();
}},
m_transform);
}

LazyTensor::LazyTensor(const std::shared_ptr<ov::op::v0::Constant>& const_ptr)
: m_impl(std::make_shared<LazyTensorImpl>(op::Const{const_ptr})) {}
: m_impl(std::make_shared<LazyTensorImpl>(op::Const(const_ptr))) {}
LazyTensor::LazyTensor(const std::vector<LazyTensor>& to_concat, const std::size_t axis)
: m_impl(std::make_shared<LazyTensorImpl>(op::Concat{to_concat, axis})) {}
LazyTensor::LazyTensor(const LazyTensor& cw,
Expand All @@ -233,11 +254,17 @@ LazyTensor LazyTensor::convert(const ov::element::Type& type) {
}

bool LazyTensor::operator==(const LazyTensor& other) const {
if (!m_impl && !other.m_impl) {
return true;
}
if ((!m_impl && other.m_impl) || (m_impl && !other.m_impl)) {
return false;
}
return *m_impl.get() == *other.m_impl.get();
}

bool LazyTensor::operator!=(const LazyTensor& other) const {
return !(*m_impl.get() == *other.m_impl.get());
return !(*this == other);
}

ov::Tensor LazyTensor::eval() const {
Expand All @@ -254,6 +281,12 @@ std::size_t LazyTensor::get_hash() const {
return m_impl->get_hash();
}

void LazyTensor::detach() {
if (m_impl) {
m_impl->detach();
}
}

std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const {
return lt.get_hash();
}
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ class LazyTensor {
bool operator!=(const LazyTensor& other) const;

ov::Tensor eval() const;

std::size_t get_hash() const;
void detach();

private:
std::shared_ptr<LazyTensorImpl> m_impl = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ using ov::npuw::online::detail::isOp;
Group::Group(const std::shared_ptr<ov::Node>& node,
size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot)
: m_nh(std::move(nh)),
m_id(gid),
Expand All @@ -36,7 +36,7 @@ Group::Group(const std::shared_ptr<ov::Node>& node,

Group::Group(size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot)
: m_nh(std::move(nh)),
m_id(gid),
Expand Down Expand Up @@ -214,23 +214,25 @@ void Group::relinkGraph(const Group::GPtr& gptr_other) {
auto consumers = gptr_other->dstNodes();

// Remove gptr_other node from the graph. Note: also removes all it's edges
m_graph->remove(gptr_other->getHandle());
auto&& graph = m_graph.lock();
NPUW_ASSERT(graph);
graph->remove(gptr_other->getHandle());
for (const auto& nh : producers) {
if (m_nh == nh) {
continue;
}
// relink the graph
if (!m_graph->linked(nh, m_nh)) {
m_graph->link(nh, m_nh);
if (!graph->linked(nh, m_nh)) {
graph->link(nh, m_nh);
}
}
for (const auto& nh : consumers) {
if (m_nh == nh) {
continue;
}
// relink the graph
if (!m_graph->linked(m_nh, nh)) {
m_graph->link(m_nh, nh);
if (!graph->linked(m_nh, nh)) {
graph->link(m_nh, nh);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ class Group : public std::enable_shared_from_this<Group> {
Group(const std::shared_ptr<ov::Node>& node,
size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot);
Group(size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot);

// After we formed a final structure of partitioning,
Expand Down Expand Up @@ -100,7 +100,7 @@ class Group : public std::enable_shared_from_this<Group> {

own::ade::NodeHandle m_nh;
size_t m_id; // used for utility prints only
std::shared_ptr<own::ade::Graph> m_graph;
std::weak_ptr<own::ade::Graph> m_graph;
std::weak_ptr<Snapshot> m_snapshot;
bool m_frozen = false;
bool m_nofold = false;
Expand Down
8 changes: 8 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ Impl<M> _(std::shared_ptr<M> pM) {

} // namespace at

// Written here to be a drop-in replacement for ov::parallel_for for the debug purposes
template <typename F>
void non_parallel_for(std::size_t count, F&& f) {
for (std::size_t idx = 0u; idx < count; idx++) {
f(idx);
}
}

} // namespace util
} // namespace npuw
} // namespace ov
Loading

0 comments on commit aa87a78

Please sign in to comment.