Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Fix many unittest failures due to the zero byte allocation of ocl memory #28270

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include <ostream>
#include <tuple>
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/optionals.hpp"
Expand Down Expand Up @@ -104,26 +103,6 @@ inline ov::Shape predict_shape(const std::string& name, const cldnn::layout layo
return layout.get_shape();
}

inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine,
const cldnn::layout& layout,
cldnn::allocation_type type,
bool reset = true) {
if (layout.bytes_count() == 0) {
auto non_zero_layout = cldnn::layout({1}, layout.data_type, layout.format);
auto res = _engine.allocate_memory(non_zero_layout, type, false);
return _engine.reinterpret_buffer(*res, layout);
} else {
return _engine.allocate_memory(layout, type, reset);
}
}

inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine,
const cldnn::layout& layout,
bool reset = true) {
cldnn::allocation_type type = _engine.get_lockable_preferred_memory_allocation_type(layout.format.is_image_2d());
return allocate_memory_evenif_zero_bytes(_engine, layout, type, reset);
}

/// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
inline void ForceExit() {
std::cerr << "[GPU] force exit.\n"
Expand Down
17 changes: 8 additions & 9 deletions src/plugins/intel_gpu/src/graph/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "mutable_data_inst.h"
#include "json_object.h"
#include "primitive_type_base.h"
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/primitives/data.hpp"
#include "intel_gpu/primitives/mutable_data.hpp"
#include <string>
Expand Down Expand Up @@ -319,7 +318,7 @@ void loop_inst::update_backedge_mapped_memory() {
// generally, shouldn't go this way, but...
auto output_prim = body_network->get_primitive(back_edge.from);
layout output_layout = output_prim->output_memory().get_layout();
backedge_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), output_layout, false);
backedge_mem = body_network->get_engine().allocate_memory(output_layout, 0);
}
} else {
auto external_id = output_mapping.front()->external_id;
Expand Down Expand Up @@ -397,7 +396,7 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(
<< sliced_layout.get_partial_shape().to_string()
<< " to " << updated_sliced_layout.to_string() << std::endl;
sliced_layout.set_partial_shape(updated_sliced_layout);
inter_mem_ptr = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout);
inter_mem_ptr = engine.allocate_memory(sliced_layout);
intern_prim->set_output_layout(sliced_layout, internal_id.idx);
}

Expand All @@ -407,8 +406,8 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(
} else {
sliced_mems.reserve(num_iterations);
sliced_mems.push_back(inter_mem_ptr);
for (int j=1; j < num_iterations; ++j) {
memory::ptr sliced_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout);
for (int j = 1; j < num_iterations; ++j) {
memory::ptr sliced_mem = engine.allocate_memory(sliced_layout);
sliced_mems.push_back(sliced_mem);
}
}
Expand Down Expand Up @@ -499,7 +498,7 @@ void loop_inst::preprocess_input_memory(const int64_t num_iterations) {
// if internal input memory is in backedge, allocate new memory.
// Because internal input memory's data will be updated through backedge process.
if (iter != _back_edges.end()) {
internal_input_memory = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), memory->get_layout(), false);
internal_input_memory = body_network->get_engine().allocate_memory(memory->get_layout(), false);
internal_input_memory->copy_from(body_network->get_stream(), *memory);
GPU_DEBUG_LOG << "Input memory of internal node(" << internal_id.to_string() << ") is set to new memory("
<< internal_input_memory << ", " << internal_input_memory->get_layout().to_short_string()
Expand Down Expand Up @@ -722,7 +721,7 @@ void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_itera
} else {
if (!output_allocated || get_flag(ExecutionFlags::SHAPE_CHANGED)) {
auto concat_layout = _impl_params->get_output_layout(external_id.idx);
auto concat_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(_network.get_engine(), concat_layout, false);
auto concat_mem = _network.get_engine().allocate_memory(concat_layout, false);
external_outputs[external_id.idx] = concat_mem;
auto iter = std::find_if(concatenated_output_mem_mappings.begin(),
concatenated_output_mem_mappings.end(),
Expand Down Expand Up @@ -1081,7 +1080,7 @@ std::vector<event::ptr> loop_inst::handle_buffers_for_next_iteration(const loop_
// Check backedge_to shape needs to be updated by initial_mem
OPENVINO_ASSERT(mapping.initial_mem != nullptr, "initial_mem should not be null");
if (!mapping.initial_mem->get_layout().identical(to_mem->get_layout())) {
to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), mapping.initial_mem->get_layout(), false);
to_mem = body_network->get_engine().allocate_memory(mapping.initial_mem->get_layout(), false);

body_network->set_input_data(to_id, to_mem);
ev = to_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem));
Expand All @@ -1104,7 +1103,7 @@ std::vector<event::ptr> loop_inst::handle_buffers_for_next_iteration(const loop_

// Check backedge_to shape needs to be updated by backedge_from
if (!from_mem->get_layout().identical(to_mem->get_layout())) {
to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), from_mem->get_layout(), false);
to_mem = body_network->get_engine().allocate_memory(from_mem->get_layout(), false);
GPU_DEBUG_LOG << iter << ") [SINGLE] Backedge_to node(" << to_id << ") is set to new memory("
<< to_mem << ", " << to_mem->get_layout().to_short_string()
<< ") because of shape update from backedge_from()" << from_id
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2411,11 +2411,11 @@ memory::ptr primitive_inst::allocate_output(engine& _engine,
if ((_node.is_output() && is_reorder_weights) || (!_node.is_output() && _node.is_type<input_layout>()))
reset = false;
GPU_DEBUG_LOG << "[" << _node.id() << ": constant]" << std::endl;
return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset);
return _engine.allocate_memory(layout, alloc_type, reset);
}
} else if (!_node.can_share_buffer() || impl_params.can_be_optimized() || _node.is_output()) {
GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset);
return _engine.allocate_memory(layout, alloc_type, reset);
} else {
return get_memory_from_pool(_engine,
net_id,
Expand Down
11 changes: 7 additions & 4 deletions src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,22 +454,25 @@ gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type)
, memory(engine, layout, type, nullptr)
, _buffer(engine->get_usm_helper())
, _host_buffer(engine->get_usm_helper()) {
auto actual_bytes_count = _bytes_count;
if (actual_bytes_count == 0)
actual_bytes_count = 1;
switch (get_allocation_type()) {
case allocation_type::usm_host:
_buffer.allocateHost(_bytes_count);
_buffer.allocateHost(actual_bytes_count);
break;
case allocation_type::usm_shared:
_buffer.allocateShared(_bytes_count);
_buffer.allocateShared(actual_bytes_count);
break;
case allocation_type::usm_device:
_buffer.allocateDevice(_bytes_count);
_buffer.allocateDevice(actual_bytes_count);
break;
default:
CLDNN_ERROR_MESSAGE("gpu_usm allocation type",
"Unknown unified shared memory type!");
}

m_mem_tracker = std::make_shared<MemoryTracker>(engine, _buffer.get(), layout.bytes_count(), type);
m_mem_tracker = std::make_shared<MemoryTracker>(engine, _buffer.get(), actual_bytes_count, type);
}

void* gpu_usm::lock(const stream& stream, mem_lock_type type) {
Expand Down
Loading