From 199ae11d60049cbc9856a2a8e34a597a880b2b8e Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Sat, 4 Jan 2025 08:07:18 +0900 Subject: [PATCH] Fix many unittest failures due to the zero byte allocation of ocl memory --- .../include/intel_gpu/plugin/common_utils.hpp | 21 ------------------- src/plugins/intel_gpu/src/graph/loop.cpp | 17 +++++++-------- .../intel_gpu/src/graph/primitive_inst.cpp | 4 ++-- .../intel_gpu/src/runtime/ocl/ocl_memory.cpp | 11 ++++++---- 4 files changed, 17 insertions(+), 36 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index 792745193ed550..c958b840e65290 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -6,7 +6,6 @@ #include #include -#include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/layout.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/optionals.hpp" @@ -104,26 +103,6 @@ inline ov::Shape predict_shape(const std::string& name, const cldnn::layout layo return layout.get_shape(); } -inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine, - const cldnn::layout& layout, - cldnn::allocation_type type, - bool reset = true) { - if (layout.bytes_count() == 0) { - auto non_zero_layout = cldnn::layout({1}, layout.data_type, layout.format); - auto res = _engine.allocate_memory(non_zero_layout, type, false); - return _engine.reinterpret_buffer(*res, layout); - } else { - return _engine.allocate_memory(layout, type, reset); - } -} - -inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine, - const cldnn::layout& layout, - bool reset = true) { - cldnn::allocation_type type = _engine.get_lockable_preferred_memory_allocation_type(layout.format.is_image_2d()); - return allocate_memory_evenif_zero_bytes(_engine, layout, type, reset); -} - /// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES. inline void ForceExit() { std::cerr << "[GPU] force exit.\n" diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp index 5d842c2a863433..f381cf3638e022 100644 --- a/src/plugins/intel_gpu/src/graph/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/loop.cpp @@ -7,7 +7,6 @@ #include "mutable_data_inst.h" #include "json_object.h" #include "primitive_type_base.h" -#include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/data.hpp" #include "intel_gpu/primitives/mutable_data.hpp" #include @@ -319,7 +318,7 @@ void loop_inst::update_backedge_mapped_memory() { // generally, shouldn't go this way, but... auto output_prim = body_network->get_primitive(back_edge.from); layout output_layout = output_prim->output_memory().get_layout(); - backedge_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), output_layout, false); + backedge_mem = body_network->get_engine().allocate_memory(output_layout, 0); } } else { auto external_id = output_mapping.front()->external_id; @@ -397,7 +396,7 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map( << sliced_layout.get_partial_shape().to_string() << " to " << updated_sliced_layout.to_string() << std::endl; sliced_layout.set_partial_shape(updated_sliced_layout); - inter_mem_ptr = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout); + inter_mem_ptr = engine.allocate_memory(sliced_layout); intern_prim->set_output_layout(sliced_layout, internal_id.idx); } @@ -407,8 +406,8 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map( } else { sliced_mems.reserve(num_iterations); sliced_mems.push_back(inter_mem_ptr); - for (int j=1; j < num_iterations; ++j) { - memory::ptr sliced_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout); + for (int j = 1; j < num_iterations; ++j) { + memory::ptr sliced_mem = engine.allocate_memory(sliced_layout); sliced_mems.push_back(sliced_mem); } } @@ -499,7 +498,7 @@ void loop_inst::preprocess_input_memory(const int64_t num_iterations) { // if internal input memory is in backedge, allocate new memory. // Because internal input memory's data will be updated through backedge process. if (iter != _back_edges.end()) { - internal_input_memory = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), memory->get_layout(), false); + internal_input_memory = body_network->get_engine().allocate_memory(memory->get_layout(), false); internal_input_memory->copy_from(body_network->get_stream(), *memory); GPU_DEBUG_LOG << "Input memory of internal node(" << internal_id.to_string() << ") is set to new memory(" << internal_input_memory << ", " << internal_input_memory->get_layout().to_short_string() @@ -722,7 +721,7 @@ void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_itera } else { if (!output_allocated || get_flag(ExecutionFlags::SHAPE_CHANGED)) { auto concat_layout = _impl_params->get_output_layout(external_id.idx); - auto concat_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(_network.get_engine(), concat_layout, false); + auto concat_mem = _network.get_engine().allocate_memory(concat_layout, false); external_outputs[external_id.idx] = concat_mem; auto iter = std::find_if(concatenated_output_mem_mappings.begin(), concatenated_output_mem_mappings.end(), @@ -1081,7 +1080,7 @@ std::vector loop_inst::handle_buffers_for_next_iteration(const loop_ // Check backedge_to shape needs to be updated by initial_mem OPENVINO_ASSERT(mapping.initial_mem != nullptr, "initial_mem should not be null"); if (!mapping.initial_mem->get_layout().identical(to_mem->get_layout())) { - to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), mapping.initial_mem->get_layout(), false); + to_mem = body_network->get_engine().allocate_memory(mapping.initial_mem->get_layout(), false); body_network->set_input_data(to_id, to_mem); ev = to_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem)); @@ -1104,7 +1103,7 @@ std::vector loop_inst::handle_buffers_for_next_iteration(const loop_ // Check backedge_to shape needs to be updated by backedge_from if (!from_mem->get_layout().identical(to_mem->get_layout())) { - to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), from_mem->get_layout(), false); + to_mem = body_network->get_engine().allocate_memory(from_mem->get_layout(), false); GPU_DEBUG_LOG << iter << ") [SINGLE] Backedge_to node(" << to_id << ") is set to new memory(" << to_mem << ", " << to_mem->get_layout().to_short_string() << ") because of shape update from backedge_from()" << from_id diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0b8e4b739f0cf7..f986e73490a484 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2411,11 +2411,11 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, if ((_node.is_output() && is_reorder_weights) || (!_node.is_output() && _node.is_type())) reset = false; GPU_DEBUG_LOG << "[" << _node.id() << ": constant]" << std::endl; - return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset); + return _engine.allocate_memory(layout, alloc_type, reset); } } else if (!_node.can_share_buffer() || impl_params.can_be_optimized() || _node.is_output()) { GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl; - return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset); + return _engine.allocate_memory(layout, alloc_type, reset); } else { return get_memory_from_pool(_engine, net_id, diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp index ee7e9a85d735b8..0c8fb2126a8aaa 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp @@ -454,22 +454,25 @@ gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type) , memory(engine, layout, type, nullptr) , _buffer(engine->get_usm_helper()) , _host_buffer(engine->get_usm_helper()) { + auto actual_bytes_count = _bytes_count; + if (actual_bytes_count == 0) + actual_bytes_count = 1; switch (get_allocation_type()) { case allocation_type::usm_host: - _buffer.allocateHost(_bytes_count); + _buffer.allocateHost(actual_bytes_count); break; case allocation_type::usm_shared: - _buffer.allocateShared(_bytes_count); + _buffer.allocateShared(actual_bytes_count); break; case allocation_type::usm_device: - _buffer.allocateDevice(_bytes_count); + _buffer.allocateDevice(actual_bytes_count); break; default: CLDNN_ERROR_MESSAGE("gpu_usm allocation type", "Unknown unified shared memory type!"); } - m_mem_tracker = std::make_shared(engine, _buffer.get(), layout.bytes_count(), type); + m_mem_tracker = std::make_shared(engine, _buffer.get(), actual_bytes_count, type); } void* gpu_usm::lock(const stream& stream, mem_lock_type type) {