From 2be7e5f945a2eb116181a670ebb338a8fa533f6a Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Sun, 20 Oct 2024 21:50:57 -0700 Subject: [PATCH] [GPU] Fix fused op macro for dynamic shape eltwise fusing into convolution, fix deconvolution attribute kernel param when 1d (#27010) ### Details: - fix fused op input load macro for convolution with dynamic shape eltwise fusing - fix deconvolution kernel stride, pad, dilation axis extension for 1d ### Tickets: - 152406 --- .../src/graph/impls/ocl/deconvolution.cpp | 16 +- .../intel_gpu/src/kernel_selector/jitter.cpp | 16 +- .../convolution_backprop_data.cpp | 36 +++ .../dynamic/convolution.cpp | 253 ++++++++++++++++++ .../dynamic/convolution_backprop_data.cpp | 49 +++- 5 files changed, 358 insertions(+), 12 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp index 5e3462a6256364..95bd66867c1b8f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp @@ -5,6 +5,7 @@ #include "primitive_base.hpp" #include "deconvolution_inst.h" +#include "intel_gpu/plugin/common_utils.hpp" #include "deconvolution/deconvolution_kernel_selector.h" #include "deconvolution/deconvolution_kernel_base.h" @@ -54,19 +55,16 @@ struct deconvolution_impl : typed_primitive_impl_ocl { params.filterSize = { kx, ky, kz }; - uint32_t pad_z = std::max(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0); - uint32_t pad_y = std::max(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0); - uint32_t pad_x = std::max(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0); + uint32_t pad_x, pad_y, pad_z; + std::tie(pad_x, pad_y, pad_z) = ov::intel_gpu::get_xyz(pad, 0); params.padding = {pad_x, pad_y, pad_z}; - uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; - uint32_t stride_y = stride.size() >= 2 ? static_cast(stride[stride.size() - 2]) : 1; - uint32_t stride_x = stride.size() >= 1 ? static_cast(stride[stride.size() - 1]) : 1; + uint32_t stride_x, stride_y, stride_z; + std::tie(stride_x, stride_y, stride_z) = ov::intel_gpu::get_xyz(stride, 1); params.stride = {stride_x, stride_y, stride_z}; - uint32_t dilation_z = dilation.size() >= 3 ? static_cast(dilation[dilation.size() - 3]) : 1; - uint32_t dilation_y = dilation.size() >= 2 ? static_cast(dilation[dilation.size() - 2]) : 1; - uint32_t dilation_x = dilation.size() >= 1 ? static_cast(dilation[dilation.size() - 1]) : 1; + uint32_t dilation_x, dilation_y, dilation_z; + std::tie(dilation_x, dilation_y, dilation_z) = ov::intel_gpu::get_xyz(dilation, 1); params.dilation = {dilation_x, dilation_y, dilation_z}; return params; diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp index 480282b6060f16..33d13429fdcf3f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp @@ -2197,7 +2197,21 @@ std::string FusedOpsCodeGenerator::GetJitLoad(const FusedOpsConfiguration& conf, if (vec_size > 1) { return block_read; - } else if (input_tensor.LogicalSize() > 1) { + } + + bool multiple_elements = false; + // For dynamic shape input tensor, check any one of static dimension has more than one element. + if (input_tensor.is_dynamic()) { + for (auto dim : input_tensor.GetDims()) { + auto v = dim.v; + if (v > 1) { + multiple_elements = true; + break; + } + } + } + + if (input_tensor.LogicalSize() > 1 || multiple_elements) { // Currently we assume that in such scenario we can safely load sub_group_size elements from the pointer return Broadcast(block_read, input_dt, vec_size); } else { diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index f379b29ce23389..489f4096795361 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -211,4 +211,40 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddi ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); +const std::vector numOutChannels1d = {256}; + +/* ============= 1D ConvolutionBackpropData ============= */ +const std::vector netPrecisions1D = { + ov::element::f32 +}; + +const std::vector> inputShapes1D = {{{1, 512, 577}}}; +const std::vector> kernels1D = {{16}}; +const std::vector> strides1D = {{8}}; +const std::vector> padBegins1D = {{4}}; +const std::vector> padEnds1D = {{4}}; +const std::vector> dilations1D = {{1}}; + + +const std::vector> outputPadding1D = {{0}}; + +const auto conv1DParams_ExplicitPadding_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels1D), + ::testing::ValuesIn(strides1D), + ::testing::ValuesIn(padBegins1D), + ::testing::ValuesIn(padEnds1D), + ::testing::ValuesIn(dilations1D), + ::testing::ValuesIn(numOutChannels1d), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(outputPadding1D) +); + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData1D_ExplicitPadding, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv1DParams_ExplicitPadding_output_padding, + ::testing::ValuesIn(netPrecisions1D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes1D)), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp index 093fca68b482fa..216a1b397c90bc 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp @@ -4,6 +4,7 @@ #include "common_test_utils/ov_tensor_utils.hpp" #include "common_test_utils/node_builders/activation.hpp" #include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/node_builders/eltwise.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "shared_test_classes/single_op/convolution.hpp" @@ -317,4 +318,256 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic3DAsymPad, Convolu ::testing::Values(false)), ConvolutionLayerGPUTestDynamic::getTestCaseName); +typedef std::tuple< + convSpecificParams, + ov::element::Type, // Model type + std::vector, // Input shapes + std::string, // Device name + bool // activation fusing +> convLayerFusingTestParamsSet; + + +class ConvolutionLayerGPUTestDynamicEltwiseFusing : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + convSpecificParams convParams; + ov::element::Type model_type; + std::vector inputShapes; + std::string targetDevice; + bool activationFusing; + std::tie(convParams, model_type, inputShapes, targetDevice, activationFusing) = obj.param; + + ov::op::PadType padType; + std::vector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; + + std::ostringstream result; + for (const auto& inputShape : inputShapes) { + result << "IS="; + result << ov::test::utils::partialShape2str({inputShape.first}) << "_"; + result << "TS=("; + for (const auto& shape : inputShape.second) { + result << ov::test::utils::vec2str(shape) << "_"; + } + } + result << ")_"; + result << "K" << ov::test::utils::vec2str(kernel) << "_"; + result << "S" << ov::test::utils::vec2str(stride) << "_"; + result << "PB" << ov::test::utils::vec2str(padBegin) << "_"; + result << "PE" << ov::test::utils::vec2str(padEnd) << "_"; + result << "D=" << ov::test::utils::vec2str(dilation) << "_"; + result << "O=" << convOutChannels << "_"; + result << "AP=" << padType << "_"; + result << "netPRC=" << model_type << "_"; + result << "trgDev=" << targetDevice << "_"; + result << "activationFusing=" << activationFusing; + + return result.str(); + } + +protected: + void SetUp() override { + convSpecificParams convParams; + std::vector inputShapes; + auto model_type = ov::element::undefined; + bool activationFusing; + std::tie(convParams, model_type, inputShapes, targetDevice, activationFusing) = this->GetParam(); + + init_input_shapes({inputShapes}); + + ov::op::PadType padType; + std::vector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; + + ov::ParameterVector inputParams; + for (auto&& shape : inputDynamicShapes) + inputParams.push_back(std::make_shared(model_type, shape)); + + auto convolutionNode = ov::test::utils::make_convolution(inputParams.front(), model_type, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels); + if (activationFusing) { + auto activationNode = ov::test::utils::make_activation(convolutionNode, model_type, ov::test::utils::ActivationTypes::Relu); + auto eltwiseNode = ov::test::utils::make_eltwise(inputParams.back(), activationNode, ov::test::utils::EltwiseTypes::ADD); + + ov::ResultVector results; + for (size_t i = 0; i < eltwiseNode->get_output_size(); i++) + results.push_back(std::make_shared(eltwiseNode->output(i))); + + function = std::make_shared(results, inputParams, "Convolution"); + } else { + auto eltwiseNode = ov::test::utils::make_eltwise(inputParams.back(), convolutionNode, ov::test::utils::EltwiseTypes::ADD); + + ov::ResultVector results; + for (size_t i = 0; i < eltwiseNode->get_output_size(); i++) + results.push_back(std::make_shared(eltwiseNode->output(i))); + + function = std::make_shared(results, inputParams, "Convolution"); + } + } +}; + +TEST_P(ConvolutionLayerGPUTestDynamicEltwiseFusing, Inference) { + run(); +} +const std::vector> dynInputShapes1D_test = { + { + { + {1, 192, ov::Dimension::dynamic()}, + {{1, 192, 191}} + }, + { + {1, 192, ov::Dimension::dynamic()}, + {{1, 192, 1}} + } + }, + { + { + {ov::Dimension::dynamic(), 192, ov::Dimension::dynamic()}, + {{1, 192, 257}} + }, + { + {1, 1, ov::Dimension::dynamic()}, + {{1, 1, 257}} + } + }, + { + { + {ov::Dimension::dynamic(), 192, ov::Dimension::dynamic()}, + {{1, 192, 257}} + }, + { + {1, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, + { + { + {ov::Dimension::dynamic(), 192, ov::Dimension::dynamic()}, + {{1, 192, 1}} + }, + { + {1, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic1D_test_0, ConvolutionLayerGPUTestDynamicEltwiseFusing, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{1}), + ::testing::Values(192), + ::testing::Values(ov::op::PadType::EXPLICIT)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn(dynInputShapes1D_test), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(false)), + ConvolutionLayerGPUTestDynamicEltwiseFusing::getTestCaseName); + +const std::vector> dynInputShapes1D_test1 = { + { + { + {1, 512, ov::Dimension::dynamic()}, + {{1, 512, 191}} + }, + { + {1, 512, ov::Dimension::dynamic()}, + {{1, 512, 1}} + } + }, + { + { + {ov::Dimension::dynamic(), 512, ov::Dimension::dynamic()}, + {{1, 512, 191}} + }, + { + {1, 1, ov::Dimension::dynamic()}, + {{1, 1, 191}} + } + }, + { + { + {ov::Dimension::dynamic(), 512, ov::Dimension::dynamic()}, + {{1, 512, 191}} + }, + { + {1, 1, ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic1D_test_1, ConvolutionLayerGPUTestDynamicEltwiseFusing, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{1}), + ::testing::Values(512), + ::testing::Values(ov::op::PadType::EXPLICIT)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn(dynInputShapes1D_test1), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(false)), + ConvolutionLayerGPUTestDynamicEltwiseFusing::getTestCaseName); + +const std::vector> dynInputShapes1D_test2 = { + { + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 191}} + }, + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 1}} + } + }, + { + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 191}} + }, + { + {ov::Dimension::dynamic(), 1, ov::Dimension::dynamic()}, + {{1, 1, 191}} + } + }, + { + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 191}} + }, + { + {ov::Dimension::dynamic(), 1, ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic1D_test_2, ConvolutionLayerGPUTestDynamicEltwiseFusing, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{1}), + ::testing::Values(2048), + ::testing::Values(ov::op::PadType::EXPLICIT)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn(dynInputShapes1D_test2), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(false)), + ConvolutionLayerGPUTestDynamicEltwiseFusing::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp index 6b255c9981c08a..98176acfc9bdc7 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp @@ -102,8 +102,8 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface> emptyOutputPadding1d = { {0} }; + +/* ============= Deconvolution params ============= */ +const std::vector numOutChannels1d = { 256 }; + +/* ============= Deconvolution params (1D) ============= */ +const std::vector> kernels1d = { {16} }; +const std::vector> strides1d = { {8} }; +const std::vector> padBegins1d = { {4} }; +const std::vector> padEnds1d = { {4} }; +const std::vector> dilations1d = { {1} }; + +/* ============= Deconvolution (1D) ============= */ +const auto convParams_ExplicitPadding_1D = ::testing::Combine( + ::testing::ValuesIn(kernels1d), + ::testing::ValuesIn(strides1d), + ::testing::ValuesIn(padBegins1d), + ::testing::ValuesIn(padEnds1d), + ::testing::ValuesIn(dilations1d), + ::testing::ValuesIn(numOutChannels1d), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding1d) +); + +const std::vector dyn_1D_inputs_smoke = { + DeconvInputData{ + InputShape{{1, 512, -1}, {{1, 512, 577}}}, + ov::test::utils::InputLayerType::CONSTANT, + {} + }, +}; + +const std::vector netPrecisions1D = { + ov::element::f32 +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Deconv_1D_Dynamic_FP32, DeconvolutionLayerGPUTest, + ::testing::Combine( + convParams_ExplicitPadding_1D, + ::testing::ValuesIn(dyn_1D_inputs_smoke), + ::testing::ValuesIn(netPrecisions1D), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(emptyAdditionalConfig)), + DeconvolutionLayerGPUTest::getTestCaseName); } // namespace