Skip to content

Commit

Permalink
Merge branch 'master' into almilosz/model-context
Browse files Browse the repository at this point in the history
  • Loading branch information
almilosz authored Nov 29, 2024
2 parents 19d64b2 + 90aacf2 commit 120ac1a
Show file tree
Hide file tree
Showing 44 changed files with 590 additions and 161 deletions.
2 changes: 1 addition & 1 deletion docs/articles_en/about-openvino/performance-benchmarks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ For a listing of all platforms and configurations used for testing, refer to the
2024.5, as of November 20, 2024.

* OpenVINO Model Server performance results are based on release
2024.4, as of Sept. 30, 2024.
2024.5, as of November 20, 2024.

The results may not reflect all publicly available updates. Intel technologies' features and
benefits depend on system configuration and may require enabled hardware, software, or service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ offer a limited set of supported OpenVINO features.
ov::intel_npu::turbo
ov::intel_npu::tiles
ov::intel_npu::max_tiles
ov::intel_npu::bypass_umd_caching
ov::intel_npu::defer_weights_load
.. tab-item:: Read-only properties

Expand All @@ -168,7 +170,6 @@ offer a limited set of supported OpenVINO features.
ov::intel_npu::device_alloc_mem_size
ov::intel_npu::device_total_mem_size
ov::intel_npu::driver_version
ov::intel_npu::bypass_umd_caching
.. note::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"whats_new_model": false,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -30,7 +30,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down Expand Up @@ -63,7 +63,7 @@
"whats_new_model": false,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -87,7 +87,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down Expand Up @@ -120,7 +120,7 @@
"whats_new_model": false,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -144,7 +144,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down Expand Up @@ -177,7 +177,7 @@
"whats_new_model": true,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -201,7 +201,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand All @@ -228,7 +228,7 @@
"whats_new_model": true,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -252,7 +252,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down Expand Up @@ -283,7 +283,7 @@
"whats_new_model": true,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -307,7 +307,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down Expand Up @@ -338,7 +338,7 @@
"whats_new_model": false,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -362,7 +362,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down Expand Up @@ -393,7 +393,7 @@
"whats_new_model": false,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -417,7 +417,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down Expand Up @@ -450,7 +450,7 @@
"whats_new_model": false,
"PlatformType": "Server Platforms (Intel® Xeon®)",
"Parameters": {
"OpenVINO Model Server": {
"Ovms": {
"Precisions": [
{
"Throughput": {
Expand All @@ -474,7 +474,7 @@
}
]
},
"vLLM with OpenVINO backend": {
"Vllm": {
"Precisions": [
{
"Throughput": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ ov::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() {
return [&](ov::pass::pattern::Matcher& m) {
auto reduce = std::dynamic_pointer_cast<T>(m.get_match_root());

if (!reduce || transformation_callback(reduce)) {
if (!reduce || transformation_callback(reduce) || ov::shape_size(reduce->input_value(0).get_shape()) == 0) {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,30 @@
#include "openvino/op/tensor_iterator.hpp"
#include "openvino/op/util/multi_subgraph_base.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "openvino/util/common_util.hpp"
#include "transformations/utils/utils.hpp"

using namespace ov::op::util;

namespace {
/** @brief Value to mark that input idx has been removed (at least one removed so last idx will be always available) */
constexpr auto mark_removed = std::numeric_limits<uint64_t>::max();

constexpr bool is_not_removed_idx(const decltype(mark_removed) idx) {
return mark_removed != idx;
}

uint64_t get_updated_idx(uint64_t idx, uint64_t removed_idx) {
if (idx == removed_idx) {
return mark_removed;
} else if (is_not_removed_idx(idx) && idx > removed_idx) {
return idx - 1;
} else {
return idx;
}
};
} // namespace

bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const std::shared_ptr<ov::Model>& m) {
RUN_ON_MODEL_SCOPE(RemoveMultiSubGraphOpDanglingParamsResults);
bool is_changed = false;
Expand Down Expand Up @@ -117,7 +137,6 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st
// Remove inputs
bool pass_required = false;
std::set<uint64_t> required_inputs_indices;
auto op_inputs = multi_subgraph_op->input_values();
std::vector<std::vector<size_t>> to_remove_descriptors_indexes;
to_remove_descriptors_indexes.resize(subgraphs_size);
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
Expand All @@ -142,64 +161,57 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st
using DescType = op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector;
auto update_body_param_desc = [](DescType& descriptors, uint64_t removed_body_idx) {
for (auto& desc : descriptors) {
if (desc->m_body_parameter_index > removed_body_idx) {
desc->m_body_parameter_index--;
}
desc->m_body_parameter_index = get_updated_idx(desc->m_body_parameter_index, removed_body_idx);
}
};
auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr<op::util::MultiSubGraphOp>& op,
std::set<uint64_t>& required_inputs_indices,
uint64_t removed_loop_idx) {
std::set<uint64_t> new_required_inputs_indices;
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
auto& descriptors = op->get_input_descriptions(static_cast<int>(body_idx));
for (auto& desc : descriptors) {
if (desc->m_input_index > removed_loop_idx) {
desc->m_input_index--;
}
desc->m_input_index = get_updated_idx(desc->m_input_index, removed_loop_idx);
}
}
for (auto input_index : required_inputs_indices) {
if (input_index > removed_loop_idx) {
new_required_inputs_indices.insert(input_index - 1);
} else {
new_required_inputs_indices.insert(input_index);
}
};

const auto update_required_input_indicies = [](std::set<uint64_t>& required_inputs_indices,
uint64_t removed_input_idx) {
std::set<uint64_t> new_required_inputs_indices;
for (const auto& input_index : required_inputs_indices) {
new_required_inputs_indices.insert(input_index > removed_input_idx ? input_index - 1 : input_index);
}
required_inputs_indices = new_required_inputs_indices;
required_inputs_indices = std::move(new_required_inputs_indices);
};
// Remove dangling body params and input and update input descriptors
auto op_inputs = multi_subgraph_op->input_values();
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(static_cast<int>(body_idx));
auto& body_func = multi_subgraph_op->get_function(static_cast<int>(body_idx));
auto& body_params = body_func->get_parameters();
op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector updated_body_in_descriptors;

for (size_t desc_idx = 0; desc_idx < body_in_descriptors.size(); ++desc_idx) {
if (std::count(std::begin(to_remove_descriptors_indexes[body_idx]),
std::end(to_remove_descriptors_indexes[body_idx]),
desc_idx) > 0) {
if (body_in_descriptors[desc_idx]->m_body_parameter_index < body_params.size()) {
auto& body_param = body_params[body_in_descriptors[desc_idx]->m_body_parameter_index];
body_func->remove_parameter(body_param);
// Move all body indexes which are after these indicated by to_remove_descriptors_indexes
update_body_param_desc(body_in_descriptors,
body_in_descriptors[desc_idx]->m_body_parameter_index);
}
// remove dangling input of MultiSubGraphOp which was not removed earlier
auto current_input_idx = body_in_descriptors[desc_idx]->m_input_index;
// the same input tensor can go to different input ports
if (current_input_idx < op_inputs.size() &&
std::count(std::begin(required_inputs_indices),
std::end(required_inputs_indices),
current_input_idx) == 0 &&
std::count(std::begin(op_inputs), std::end(op_inputs), op_inputs[current_input_idx]) > 0) {
op_inputs.erase(std::next(op_inputs.begin(), current_input_idx));
// Move all input indexes (in all bodies) which are after these indicated by
// to_remove_descriptors_indexes and are not used in any body
update_op_inputs_desc(multi_subgraph_op, required_inputs_indices, current_input_idx);
}
} else {
updated_body_in_descriptors.emplace_back(body_in_descriptors[desc_idx]);
auto& current_body_desc = body_in_descriptors[desc_idx];
const auto current_body_parameter_idx = current_body_desc->m_body_parameter_index;
if (!util::contains(to_remove_descriptors_indexes[body_idx], desc_idx)) {
updated_body_in_descriptors.emplace_back(current_body_desc);
} else if (is_not_removed_idx(current_body_parameter_idx)) {
auto& body_func = multi_subgraph_op->get_function(body_idx);
const auto& body_params = body_func->get_parameters();

body_func->remove_parameter(body_params[current_body_parameter_idx]);
// Move all body indexes which are after these indicated by to_remove_descriptors_indexes
update_body_param_desc(body_in_descriptors, current_body_parameter_idx);
}

const auto current_input_idx = current_body_desc->m_input_index;
// remove dangling input of MultiSubGraphOp which was not removed earlier
// the same input tensor can go to different input ports
if (!util::contains(required_inputs_indices, current_input_idx) &&
is_not_removed_idx(current_input_idx)) {
op_inputs.erase(op_inputs.begin() + current_input_idx);
// Move all input indexes (in all bodies) which are after these indicated by
// to_remove_descriptors_indexes and are not used in any body
update_op_inputs_desc(multi_subgraph_op, current_input_idx);
update_required_input_indicies(required_inputs_indices, current_input_idx);
}
}
multi_subgraph_op->set_input_descriptions(static_cast<int>(body_idx), updated_body_in_descriptors);
Expand Down
Loading

0 comments on commit 120ac1a

Please sign in to comment.