diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 6edc4f062536d0..90820d550df179 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -237,7 +237,6 @@ std::vector disabledTestPatterns() { R"(.*smoke_FakeQuantize.*/FakeQuantizeLayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*)", R"(.*smoke_FakeQuantizePerChannel.*/FakeQuantizeLayerTest.Inference.*TS=.*11.10.22.19.*LEVELS=(255|256).*netPRC=f32.*)", R"(.*smoke_MVN_5D/Mvn6LayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*netPRC=f16.*)", - R"(.*smoke_Snippets_MHAINT8MatMul/MHAINT8MatMul.*)", R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.*2.1.5.*2.1.1.*2.1.1.*)", R"(.*smoke_InterpolateBicubicPillow_Layout_Test/InterpolateLayerCPUTest.CompareWithRefs/ShapeCalcMode=sizes_IS=\[?.2..20.?.?\]_TS.*1.17.4.4.*2.3.10.12.*1.17.4.4.*Sizes.*4.4.*10.20.*10.4.*PARAMETER.*0.0.0.0.*0.0.1.1.*2.3.*)", R"(.*smoke_LoopForCommon/LoopLayerCPUTest.CompareWithRefs/.*_netType=bf16.*)", @@ -563,7 +562,7 @@ std::vector disabledTestPatterns() { // ignored for not supported bf16 platforms retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)"); retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*EnforceBF16.*)"); retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)"); } // [150842] Need to support dynamic K dimension of BF16|INT8 MatMul on AMX systems @@ -572,6 +571,11 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*smoke_Snippets_MatMul/MatMul.CompareWithRefImpl/.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); retVector.emplace_back(R"(.*smoke_Snippets_MatMulTransposeB.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); retVector.emplace_back(R"(.*smoke_Snippets_MatMulBias.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); + + retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16_3D.*IS\[1\]=\[2.64.\?\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[(\?|1).(\?|4).(\?|12).(\?|64)\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[\?.\?.\?\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_(MHAINT8MatMul|MHAQuantMatMul0|MHAFQAfterMatMul_4D|smoke_Snippets_MHAFQ).*IS\[0\]=\[\?.\?.\?\.\?].*)"); } #ifdef SNIPPETS_LIBXSMM_TPP // GN in TPP requires exposing tmp Buffer results outside the loop (ticket: 151234) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index f5057137f9b65c..176f0cb4d46aed 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -4,44 +4,26 @@ #include "snippets/matmul.hpp" -#include "common_test_utils/test_constants.hpp" -#include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) - namespace { -static inline std::vector> quantized_precisions() { - std::vector> prc = {}; - // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms - if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { - prc.emplace_back(std::vector{element::i8, element::i8}); - prc.emplace_back(std::vector{element::u8, element::i8}); - } - return prc; -} - static inline std::vector> precisions() { - std::vector> prc = { - {element::f32, element::f32}, - }; + std::vector> prc = precision_f32(2); // Note: TPP doesn't support low precisions yet #ifndef SNIPPETS_LIBXSMM_TPP - auto quant = quantized_precisions(); + auto quant = quantized_precisions_if_supported(); std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); - // In Snippets MatMul BF16 is supported only on bf16/AMX platforms - if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { - prc.emplace_back(std::vector{element::bf16, element::bf16}); - } + auto bfloat = precision_bf16_if_supported(2); + std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc)); #endif return prc; } - std::vector> input_shapes{ { {{}, {{2, 1, 3, 5}}}, {{}, {{1, 3, 5, 3}}} }, { {{}, {{3, 1, 32, 14}}}, {{}, {{1, 3, 14, 37}}} }, @@ -158,7 +140,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized, ::testing::Combine( ::testing::ValuesIn(input_shapes_bias), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph ::testing::Values(1), // Tokenized MatMul+Bias @@ -167,8 +149,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] @@ -177,8 +159,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp index 79db0b1546b2a8..63f5176684ccc1 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp @@ -1,60 +1,70 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "snippets/mha.hpp" -#include "common_test_utils/test_constants.hpp" -#include "internal_properties.hpp" -#include "utils/cpu_test_utils.hpp" -#include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) namespace { -const auto& inputShapes_4D = STATIC_SHAPES( - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, - {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}}, - {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}}, - {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}}, - {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}}); - -const auto& inputShapes_3D = STATIC_SHAPES( - {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, - {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}}, - {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}}); - -static inline bool is_bf16_supported() { - return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16(); -} - -static inline std::vector> precision_f32(size_t count) { - std::vector> prc; - prc.emplace_back(std::vector(count, element::f32)); - return prc; -} - -static inline std::vector> precision_bf16(size_t count) { - std::vector> prc; - if (is_bf16_supported()) - prc.emplace_back(std::vector(count, element::bf16)); - return prc; +std::vector> transposedShape_4D(bool with_dynamic = true) { + auto shapes = SNIPPETS_TESTS_STATIC_SHAPES( + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, + {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}}, + {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}}, + {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}}, + {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}}); + if (with_dynamic) { + std::vector> dynamic_shapes = {{ + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + {PartialShape{-1, -1, -1, 128}, {{1, 4, 64, 128}, {2, 2, 16, 128}, {1, 4, 72, 128}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 16, 2, 100}, {1, 128, 3, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 128, 2, 100}, {1, 128, 1, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {2, 2, 16, 128}, {2, 1, 128, 128}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 128, 2, 100}, {1, 128, 3, 64}}}, + }, + { + {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, + {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, + }}; + shapes.insert(shapes.end(), dynamic_shapes.begin(), dynamic_shapes.end()); + } + return shapes; } -static ov::AnyMap enable_callback() { - return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)}); +std::vector> transposedShape_3D(bool with_dynamic = true) { + auto shapes = SNIPPETS_TESTS_STATIC_SHAPES( + {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, + {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}}, + {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}}); + if (with_dynamic) { + shapes.push_back({ + {PartialShape{-1, -1, -1}, {{128, 3, 64}, {128, 3, 64}, {68, 6, 87}}}, + {PartialShape{-1, -1, -1}, {{128, 1, 64}, {128, 1, 64}, {13, 6, 87}}}, + {PartialShape{-1, -1, -1}, {{1, 128, 128}, {1, 128, 128}, {1, 68, 13}}}, + {PartialShape{-1, -1, -1}, {{128, 3, 64}, {128, 3, 64}, {13, 6, 87}}}, + }); + } + return shapes; } INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false, true}), + ::testing::Values(false), ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), @@ -62,27 +72,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); -std::vector> inputShapes_4D_dynamic{ - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, - }, - { - {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, - {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D, +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D_WithScalarMul, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D_dynamic), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D(false)), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false}), + ::testing::Values(true), ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), @@ -90,13 +85,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_3D), + ::testing::Combine(::testing::ValuesIn(transposedShape_3D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false, true}), + ::testing::Values(false), ::testing::Values(MHA::default_thread_count), ::testing::Values(5), // [122706]: Subgraph + 4 Transpose ::testing::Values(2), // decomposed Transpose + MHA @@ -104,111 +98,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); -const auto& splitm_static_shapes = STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_4D_SplitDimensionM_static, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_static_shapes), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(true), - ::testing::Values(4), // 4 Threads - ::testing::Values(6), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(enable_callback())), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_3D_SplitDimensionM_static, - MHA, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(true), - ::testing::Values(4), // 4 Threads - ::testing::Values(10), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes - ::testing::Values(1), // MHA - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(enable_callback())), - MHA::getTestCaseName); - -std::vector> splitm_dynamic_shapes_4d = { - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - }, - { - {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, - { - {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, - { - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_4D_SplitDimensionM_dynamic, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), - ::testing::Values(4), // 4 Threads - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> splitm_dynamic_shapes_3d = { - { - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}}, - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - }, - { - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}}, - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_3D_SplitDimensionM_dynamic, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), - ::testing::Values(4), // 4 Threads - ::testing::Values(5), // Subgraph + 4 Transpose - ::testing::Values(2), // MHA + one of the transposes is executed via Subgraph (because callback is disabled) - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D_WithScalarMul, + MHA, + ::testing::Combine(::testing::ValuesIn(transposedShape_3D(false)), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // [122706]: Subgraph + 4 Transpose + ::testing::Values(2), // decomposed Transpose + MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), - ::testing::ValuesIn(precision_bf16(4)), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), + ::testing::ValuesIn(precision_bf16_if_supported(4)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({false, true}), ::testing::Values(MHA::default_thread_count), @@ -220,7 +126,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::bf16), ::testing::ValuesIn({false}), @@ -231,321 +137,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), MHA::getTestCaseName); -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAMulAdd, - MHAMulAdd, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false}), // Need to support True for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapeSelect = STATIC_SHAPES( - // without broadcast - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}}, - {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}}, - // with broadcast - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}}, - {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}} -); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA, - MHASelect, - ::testing::Combine(::testing::ValuesIn(inputShapeSelect), - ::testing::ValuesIn(precision_f32(6)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // Need to support True for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(2), // Less + MHA - ::testing::Values(2), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapesWOTranspose_4D = STATIC_SHAPES( - {{1, 12, 197, 64}, {1, 12, 64, 197}, {1, 12, 197, 64}}, - {{1, 12, 12, 64}, {1, 12, 64, 48}, {1, 12, 48, 64}}); -const auto& inputShapesWOTranspose_3D = STATIC_SHAPES( - {{12, 197, 64}, {12, 64, 197}, {12, 197, 64}}, - {{12, 128, 100}, {12, 100, 128}, {12, 128, 100}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeOnInputs_4D, - MHAWOTransposeOnInputs, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(true), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTranspose_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTranspose_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapesWOTranspose_3D_dynamic{ - { - {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, - {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}}, - {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, - }, - { - {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}}, - {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 2}, {2, 64, 9}}}, - {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}}, - }, -}; - - - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_DynMHAWOTranspose_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D_dynamic), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeBF16_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_bf16(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeBF16_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_bf16(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeEnforceBF16_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::bf16), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeEnforceBF16_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::bf16), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAINT8MatMul, - MHAINT8MatMul, - ::testing::Combine(::testing::ValuesIn(std::vector>(inputShapes_4D.begin(), - inputShapes_4D.begin() + 2)), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(6), // FQx3 on inputs + MHA + Transpose on output + Deq Mul - ::testing::Values(5), // FQx3 on inputs + MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAQuantMatMul0, - MHAQuantMatMul0, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 128, 768}, {1, 128, 768}, {1, 1, 1, 128}, {1, 128, 768}})), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(9), // FQx2 on inputs + MHA + Transpose on output + 4 Reshapes + Deq Mul - ::testing::Values(4), // FQx2 on inputs + MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFQAfterMatMul_4D, - MHAFQAfterMatMul, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(3), // MHA + Transpose on output + Deq Mul - ::testing::Values(2), // MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAFQ, - MHAFQ, - ::testing::Combine(::testing::ValuesIn(STATIC_SHAPES({{1, 64, 12, 64}, - {1, 64, 12, 64}, - {1, 1, 1, 64}, - {1, 64, 12, 64}})), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(7), // Transposex2 + Subgraphsx5 - ::testing::Values(5), // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapesTransposedB { - { - {{}, {{1, 12, 12, 64}}}, - {{}, {{1, 12, 48, 64}}}, - {{}, {{1, 12, 48, 64}}} - }, - { - {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}}, - {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}}, - {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}}, - }, - { - {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}}, - {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}}, - {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHATransposedB, - MHATransposedB, - ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapesExtractedReshape = STATIC_SHAPES( - {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}}, - {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}}, - {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}}, - {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}}, - {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}}, - {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWithExtractedReshape, - MHAWithExtractedReshape, - ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // False is not supported for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(3), // Extracted Add + Extracted Reshape + MHA - ::testing::Values(2), // Extracted Add + MHA - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapes_4D_WithMul_dynamic{ - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, - {PartialShape{1}, {{1}, {1}, {1}, {1} }}, - {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, - }, - { - {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, - {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1}, {1, 12, 64, 35}}}, - {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D_WithMul, - MHAWithDynamicMul, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D_WithMul_dynamic), - ::testing::ValuesIn(precision_f32(5)), - ::testing::Values(ov::element::f32), - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHAWithDynamicMul::getTestCaseName); - } // namespace } // namespace snippets } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp new file mode 100644 index 00000000000000..f3c1439395650a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +const auto& inputShapesExtractedReshape = SNIPPETS_TESTS_STATIC_SHAPES( + {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}}, + {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}}, + {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}}, + {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}}, + {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}}, + {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}}); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWithExtractedReshape, + MHAWithExtractedReshape, + ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({true}), // False is not supported for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(3), // Extracted Add + Extracted Reshape + MHA + ::testing::Values(2), // Extracted Add + MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp new file mode 100644 index 00000000000000..4bf35e2daa690d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAMulAdd, + MHAMulAdd, + ::testing::Combine( + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({false}), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp new file mode 100644 index 00000000000000..0c731b74565863 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> inputShapesQuantized { + { + {{}, {{1, 128, 16, 64}}}, + {{}, {{1, 128, 16, 64}}}, + {{}, {{1, 16, 1, 1}}}, + {{}, {{1, 128, 16, 64}}} + }, + { + {{}, {{2, 68, 6, 92}}}, + {{}, {{2, 68, 6, 92}}}, + {{}, {{1, 1, 68, 68}}}, + {{}, {{2, 68, 6, 92}}} + }, + // K, N are static + { + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + {PartialShape{-1, -1, -1, 128}, {{1, 4, 64, 128}, {2, 2, 16, 128}, {1, 4, 72, 128}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 16, 2, 100}, {1, 128, 3, 64}, {1, 128, 12, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 128, 2, 100}, {1, 128, 1, 64}, {1, 128, 12, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 1, 128}, {2, 1, 128, 128}, {1, 12, 1, 1}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 128, 2, 100}, {1, 128, 3, 64}, {1, 128, 12, 600}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAINT8MatMul, + MHAINT8MatMul, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(6), // FQx3 on inputs + MHA + Transpose on output + Deq Mul + ::testing::Values(5), // FQx3 on inputs + MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAQuantMatMul0, + MHAQuantMatMul0, + ::testing::Combine( + ::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // FQx2 on inputs + MHA + Transpose on output + Deq Mul + ::testing::Values(4), // FQx2 on inputs + MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAFQAfterMatMul_4D, + MHAFQAfterMatMul, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(3), // MHA + Transpose on output + Deq Mul + ::testing::Values(2), // MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAFQ, + MHAFQ, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(7), // Transposex2 + Subgraphsx5 + ::testing::Values(5), // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp new file mode 100644 index 00000000000000..3fc1417d20b102 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +const auto& inputShapeSelect = SNIPPETS_TESTS_STATIC_SHAPES( + // without broadcast + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}}, + {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}}, + // with broadcast + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}}, + {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}} +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA, + MHASelect, + ::testing::Combine(::testing::ValuesIn(inputShapeSelect), + ::testing::ValuesIn(precision_f32(6)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(2), // Less + MHA + ::testing::Values(2), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp new file mode 100644 index 00000000000000..bb5f7fe2fa5b52 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +static ov::AnyMap enable_callback() { + return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)}); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_SplitDimensionM_static, + MHA, + ::testing::Combine(::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}})), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(6), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_3D_SplitDimensionM_static, + MHA, + ::testing::Combine( + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(10), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes + ::testing::Values(1), // MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +std::vector> splitm_dynamic_shapes_4d = { + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + }, + { + {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, + { + {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_SplitDimensionM_dynamic, + MHA, + ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), + ::testing::Values(4), // 4 Threads + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +std::vector> splitm_dynamic_shapes_3d = { + { + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}}, + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + }, + { + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}}, + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_3D_SplitDimensionM_dynamic, + MHA, + ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), + ::testing::Values(4), // 4 Threads + ::testing::Values(5), // Subgraph + 4 Transpose + ::testing::Values(2), // MHA + one of the transposes is executed via Subgraph (because callback is disabled) + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp new file mode 100644 index 00000000000000..45260df3cab280 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> inputShapesTransposedB { + { + {{}, {{1, 12, 12, 64}}}, + {{}, {{1, 12, 48, 64}}}, + {{}, {{1, 12, 48, 64}}} + }, + { + {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}}, + {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}}, + {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}}, + }, + { + {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}}, + {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}}, + {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHATransposedB, + MHATransposedB, + ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp new file mode 100644 index 00000000000000..7876d737af2281 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> transposedShape_4D_WithMul { + { + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}}, + {PartialShape{-1, -1, 100, 200}, {{1, 4, 100, 200}, {2, 2, 100, 200}, {1, 4, 100, 200}}}, + {PartialShape{-1, -1, -1, 200}, {{1, 4, 64, 200}, {2, 2, 16, 200}, {1, 4, 72, 200}}}, + {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, + {PartialShape{1}, {{1}, {1}, {1}, {1} }}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, + }, + { + {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, + {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1}, {1, 12, 64, 35}}}, + {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_WithDynamicMul, + MHAWithDynamicMul, + ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul), + ::testing::ValuesIn(precision_f32(5)), + ::testing::Values(ov::element::f32), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHAWithDynamicMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_WithDynamicMul_EnforceBF16, + MHAWithDynamicMul, + ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul), + ::testing::ValuesIn(precision_f32(5)), + ::testing::Values(ov::element::bf16), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(8), // MHA + 1 Transpose on output + 6 Converts around + ::testing::Values(7), // MHA + 6 Converts around + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHAWithDynamicMul::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp new file mode 100644 index 00000000000000..0967ef27087674 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp @@ -0,0 +1,151 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> originalShape_4D { + { {{}, {{1, 12, 197, 64}}}, {{}, {{1, 12, 64, 197}}}, {{}, {{1, 12, 197, 64}}} }, + { {{}, {{1, 12, 12, 64}}}, {{}, {{1, 12, 64, 48}}}, {{}, {{1, 12, 48, 64}}} }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {1, 12, 100, 197}, {1, 3, 64, 128}, {1, 12, 600, 197}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}}, + }, + { + {PartialShape{1, 4, -1, -1}, {{1, 4, 384, 64}, {1, 4, 197, 64}, {1, 4, 384, 560}}}, + {PartialShape{1, 4, -1, -1}, {{1, 4, 64, 128}, {1, 4, 64, 197}, {1, 4, 560, 384}}}, + {PartialShape{1, 4, -1, 64}, {{1, 4, 128, 64}, {1, 4, 197, 64}, {1, 4, 384, 64}}}, + } +}; + +std::vector> originalShape_3D { + { {{}, {{12, 197, 64}}}, {{}, {{12, 64, 197}}}, {{}, {{12, 197, 64}}} }, + { {{}, {{12, 128, 100}}}, {{}, {{12, 100, 128}}}, {{}, {{12, 128, 100}}} }, + { + {PartialShape{-1, -1, 64}, {{2, 9, 64}, {1, 64, 64}, {2, 64, 64}}}, + {PartialShape{-1, 64, 124}, {{2, 64, 124}, {1, 64, 124}, {2, 64, 124}}}, + {PartialShape{-1, 124, 64}, {{2, 124, 64}, {1, 124, 64}, {2, 124, 64}}}, + }, + { + {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, + {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}}, + {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, + }, + { + {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}}, + {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 4}, {2, 64, 9}}}, + {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeOnInputs_4D, + MHAWOTransposeOnInputs, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTranspose_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTranspose_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeBF16_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_bf16_if_supported(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeBF16_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_bf16_if_supported(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeEnforceBF16_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::bf16), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeEnforceBF16_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::bf16), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp index c05087283305e4..ea7de9ccb209ad 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp @@ -6,36 +6,28 @@ #include "common_test_utils/test_constants.hpp" #include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) - namespace { static inline std::vector> precisions(bool only_fp32 = true) { - std::vector> prc = { - {element::f32, element::f32}, - }; -// Note: low precisions are not supported by TPP yet (ticker: 130010) + std::vector> prc = precision_f32(2); +// Note: TPP doesn't support low precisions yet #ifndef SNIPPETS_LIBXSMM_TPP if (!only_fp32) { - // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms - if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { - prc.emplace_back(std::vector{element::i8, element::i8}); - prc.emplace_back(std::vector{element::u8, element::i8}); - } - // In Snippets MatMul BF16 is supported only on bf16/AMX platforms - if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { - prc.emplace_back(std::vector{element::bf16, element::bf16}); - } + auto quant = quantized_precisions_if_supported(); + std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); + auto bfloat = precision_bf16_if_supported(2); + std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc)); } #endif return prc; } namespace transpose_zero_input { -const auto& transpose_input_shapes = STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( ::testing::ValuesIn(transpose_input_shapes), @@ -84,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul, } // namespace transpose_zero_input namespace transpose_first_input { -const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( ::testing::ValuesIn(transpose_input_shapes), @@ -126,7 +118,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, } // namespace transpose_first_input namespace transpose_output { -const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( @@ -195,7 +187,7 @@ static inline std::vector> precisions(bool only_fp32 } INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ExplicitTransposeMatMul, ExplicitTransposeMatMul, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), ::testing::Values(MatMulType::MatMul), @@ -223,7 +215,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynExplicitTransposeMatMul, ExplicitTran INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulBias, ExplicitTransposeMatMulBias, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), ::testing::Values(MatMulType::MatMul), diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp new file mode 100644 index 00000000000000..6c0d54da973086 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "internal_properties.hpp" +#include "utils/cpu_test_utils.hpp" +#include "openvino/runtime/system_conf.hpp" + +namespace ov { +namespace test { +namespace snippets { + +#define SNIPPETS_TESTS_STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) + +static inline bool is_bf16_supported_by_brgemm() { + return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16(); +} + +static inline bool is_i8_supported_by_brgemm() { + return ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8(); +} + +static inline std::vector> precision_f32(size_t count) { + std::vector> prc; + prc.emplace_back(std::vector(count, element::f32)); + return prc; +} + +static inline std::vector> precision_bf16_if_supported(size_t count) { + std::vector> prc; + if (is_bf16_supported_by_brgemm()) + prc.emplace_back(std::vector(count, element::bf16)); + return prc; +} + +static inline std::vector> quantized_precisions_if_supported() { + std::vector> prc = {}; + // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms + if (is_i8_supported_by_brgemm()) { + prc.emplace_back(std::vector{element::i8, element::i8}); + prc.emplace_back(std::vector{element::u8, element::i8}); + } + return prc; +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/snippets/mha.hpp b/src/tests/functional/plugin/shared/include/snippets/mha.hpp index f8198dee0218ee..34cb4d452bfb15 100644 --- a/src/tests/functional/plugin/shared/include/snippets/mha.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/mha.hpp @@ -44,6 +44,7 @@ class MHABase : virtual public SnippetsTestsCommon { void generate_inputs(const std::vector& targetInputStaticShapes) override; virtual std::shared_ptr get_subgraph() const = 0; virtual void init_params(std::vector& input_shapes, ov::element::Type& prc, ov::AnyMap& additional_config) = 0; + virtual void init_thresholds(); size_t m_thread_count; std::vector m_input_types; @@ -88,6 +89,7 @@ class MHATransposedB : public MHA { class MHAINT8MatMul : public MHA { protected: std::shared_ptr get_subgraph() const override; + void init_thresholds() override; }; class MHAQuantMatMul0 : public MHA { @@ -103,6 +105,7 @@ class MHAFQAfterMatMul : public MHA { class MHAFQ : public MHA { protected: std::shared_ptr get_subgraph() const override; + void init_thresholds() override; }; class MHAWithExtractedReshape : public MHA { diff --git a/src/tests/functional/plugin/shared/src/snippets/mha.cpp b/src/tests/functional/plugin/shared/src/snippets/mha.cpp index 351cd50856357d..8d0cb8613bc47e 100644 --- a/src/tests/functional/plugin/shared/src/snippets/mha.cpp +++ b/src/tests/functional/plugin/shared/src/snippets/mha.cpp @@ -53,15 +53,19 @@ void MHABase::SetUp() { configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); } - setInferenceType(prc); inType = outType = prc; + setInferenceType(prc); + init_thresholds(); +} + + void MHABase::init_thresholds() { // Note: Libxsmm calculates Exp in a slightly different way, so the abs values might differ a bit. Ticket: 130699 #ifdef SNIPPETS_LIBXSMM_TPP abs_threshold = 1e-6; #endif - if (prc == ov::element::bf16) + if (inType == ov::element::bf16) rel_threshold = 0.05f; -} + } std::string MHA::getTestCaseName(testing::TestParamInfo obj) { std::vector input_shapes; @@ -194,6 +198,11 @@ std::shared_ptr MHAINT8MatMul::get_subgraph() const { return std::make_shared(inputDynamicShapes); } +void MHAINT8MatMul::init_thresholds() { + MHABase::init_thresholds(); + abs_threshold = 4e-6; +} + std::shared_ptr MHAQuantMatMul0::get_subgraph() const { return std::make_shared(inputDynamicShapes); } @@ -206,6 +215,11 @@ std::shared_ptr MHAFQ::get_subgraph() const { return std::make_shared(inputDynamicShapes); } +void MHAFQ::init_thresholds() { + MHABase::init_thresholds(); + abs_threshold = 0.016; +} + std::shared_ptr MHAMulAdd::get_subgraph() const { return std::make_shared(inputDynamicShapes); } diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp index 90ab47214effee..f54f92c598a45f 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp @@ -235,9 +235,7 @@ class MHAWOTransposeSplitMFunction : public MHAWOTransposeFunction { * FakeQuantize i8 * \ / * Add - * Reshape0 - * Softmax - * Reshape1 Transpose2[0,2,1,3] + * Softmax Transpose2[0,2,1,3] * \ / * MatMul1 * FakeQuantize i8 @@ -261,9 +259,7 @@ class MHAFQAfterMatMulFunction : public SnippetsFunctionBase { * FakeQuantize i8 * \ / * Add - * Reshape0 - * Softmax - * Reshape1 FakeQuantize i8 + * Softmax FakeQuantize i8 * FakeQuantize u8 Transpose2[0,2,1,3] * \ / * MatMul1 @@ -281,20 +277,17 @@ class MHAINT8MatMulFunction : public SnippetsFunctionBase { }; /* Graph: - * FakeQuantize i8 Reshape1 - * Reshape0 Transpose1[0,2,3,1] + * FakeQuantize i8 Transpose1[0,2,3,1] * Transpose0[0,2,1,3] FakeQuantize i8 * \ / * MatMul0 * \ / - * Add Reshape2 + * Add * Softmax Transpose2[0,2,1,3] * \ / * MatMul1 * FakeQuantize i8 * Transpose3[0,2,1,3] - * Reshape3 - * Note: Reshapes are tosplit Tokenization between FQs and deq Mul and MHA since Snippets::Ignore_Callback may be enabled */ class MHAQuantMatMul0Function : public SnippetsFunctionBase { public: diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp index 1dbf8d7d22ed26..34f42ec838aa6d 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp @@ -598,38 +598,25 @@ std::shared_ptr MHAFQAfterMatMulFunction::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto shape_rank = input_shapes[0].get_shape().size(); + const auto shape_rank = input_shapes[0].size(); auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData); - - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - bool transA = false; bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); auto fq0 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto add = std::make_shared(fq0, addParam); - const auto reshape0 = std::make_shared(add, reshape0Const, true); - const auto softMax = std::make_shared(reshape0, 1); - const auto reshape1 = std::make_shared(softMax, reshape1Const, true); + const auto softMax = std::make_shared(add, -1); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); - const auto matMul1 = std::make_shared(reshape1, transpose2, transA, transB); + const auto matMul1 = std::make_shared(softMax, transpose2, transA, transB); auto fq1 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto transpose3 = std::make_shared(fq1, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)}; @@ -642,46 +629,33 @@ std::shared_ptr MHAINT8MatMulFunction::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto shape_rank = input_shapes[0].get_shape().size(); + const auto shape_rank = input_shapes[0].size(); auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData); - - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); auto fq2 = ov::test::utils::make_fake_quantize(transpose2Param, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); bool transA = false; bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); auto fq3 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto add = std::make_shared(fq3, addParam); - const auto reshape0 = std::make_shared(add, reshape0Const, true); - const auto softMax = std::make_shared(reshape0, 1); - const auto reshape1 = std::make_shared(softMax, reshape1Const, true); - auto fq4 = ov::test::utils::make_fake_quantize(reshape1, ov::element::f32, 256, {1}, - {0}, {0.820726}, {0}, {0.820726}); + const auto softMax = std::make_shared(add, -1); + auto fq4 = ov::test::utils::make_fake_quantize(softMax, ov::element::f32, 256, {1}, + {0}, {0.820726}, {0}, {0.820726}); const auto transpose2 = std::make_shared(fq2, transpose2Const); const auto matMul1 = std::make_shared(fq4, transpose2, transA, transB); auto fq5 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto transpose3 = std::make_shared(fq5, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)}; @@ -694,34 +668,20 @@ std::shared_ptr MHAQuantMatMul0Function::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto channel = int64_t(12); - const auto last_dim = input_shapes[0].get_shape().back(); - OPENVINO_ASSERT(last_dim % channel == 0, "Incorrect test configuration"); - const auto new_shape = std::vector{0, 0, channel, static_cast(last_dim) / channel}; - - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape); - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape); - auto reshape2Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape); - auto reshape3Const = ov::op::v0::Constant::create(ov::element::i64, {input_shapes[0].size()}, std::vector{0, 0, -1}); - - auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 1, 3}); - auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 3, 1}); - auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 1, 3}); - auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 1, 3}); - - const auto reshape1 = std::make_shared(transpose1Param, reshape1Const, true); - const auto reshape2 = std::make_shared(transpose2Param, reshape2Const, true); + const auto shape_rank = input_shapes[0].size(); + auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); + auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); + auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); + auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - const auto transpose1 = std::make_shared(reshape1, transpose1Const); - const auto transpose2 = std::make_shared(reshape2, transpose2Const); + const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); + const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, - {-12.5187311}, {12.4209289}, {-12.5187311}, {12.4209289}); + {-12.5187311}, {12.4209289}, {-12.5187311}, {12.4209289}); auto fq1 = ov::test::utils::make_fake_quantize(transpose1, ov::element::f32, 256, {1}, - {-1.43326699}, {1.42206954}, {-1.43326699}, {1.42206954}); - - const auto reshape0 = std::make_shared(fq0, reshape0Const, true); - const auto transpose0 = std::make_shared(reshape0, transpose0Const); + {-1.43326699}, {1.42206954}, {-1.43326699}, {1.42206954}); + const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto matMul0 = std::make_shared(transpose0, fq1); const auto add = std::make_shared(matMul0, addParam); @@ -729,11 +689,10 @@ std::shared_ptr MHAQuantMatMul0Function::initOriginal() const { const auto matMul1 = std::make_shared(softMax, transpose2); auto fq2 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-1.81826221}, {1.804057}, {-1.81826221}, {1.804057}); + {-1.81826221}, {1.804057}, {-1.81826221}, {1.804057}); const auto transpose3 = std::make_shared(fq2, transpose3Const); - const auto reshape3 = std::make_shared(transpose3, reshape3Const, true); - ov::ResultVector results{std::make_shared(reshape3)}; + ov::ResultVector results{std::make_shared(transpose3)}; return std::make_shared(results, ngraphParam, "mha"); } std::shared_ptr MHAFQFunction::initOriginal() const { @@ -743,18 +702,15 @@ std::shared_ptr MHAFQFunction::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto shape_rank = input_shapes[0].get_shape().size(); + const auto shape_rank = input_shapes[0].size(); auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - const auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, - {-5.217694}, {6.661877}, {-5.217694}, {6.661877}); - const auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1}, - {-6.40245}, {6.45286}, {-6.40245}, {6.45286}); - const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1}, - {-1000}, {0}, {-1000}, {0}); + const auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, {-5.217694}, {6.661877}, {-5.217694}, {6.661877}); + const auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1}, {-6.40245}, {6.45286}, {-6.40245}, {6.45286}); + const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1}, {-1000}, {0}, {-1000}, {0}); bool transA = false; bool transB = false; @@ -766,16 +722,13 @@ std::shared_ptr MHAFQFunction::initOriginal() const { const auto mul_deq_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1}, std::vector{0.00098425}); const auto mul_deq = std::make_shared(convert, mul_deq_const); const auto mul = std::make_shared(transpose1, mul_deq); - auto fq1_1 = ov::test::utils::make_fake_quantize(mul, ov::element::f32, 256, {1}, - {-0.8003067}, {0.8066083}, {-0.8003067}, {0.8066083}); + const auto fq1_1 = ov::test::utils::make_fake_quantize(mul, ov::element::f32, 256, {1}, {-0.8003067}, {0.8066083}, {-0.8003067}, {0.8066083}); const auto matMul0 = std::make_shared(transpose0, fq1_1, transA, transB); - auto fq2 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, - {-14.50351}, {17.65645}, {-14.50351}, {17.65645}); + const auto fq2 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, {-14.50351}, {17.65645}, {-14.50351}, {17.65645}); const auto add = std::make_shared(fq2, fq_add); const auto softMax = std::make_shared(add, 3); const auto matMul1 = std::make_shared(softMax, transpose2, transA, transB); - auto fq3 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-1.895786}, {2.0028071}, {-1.895786}, {2.0028071}); + auto fq3 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, {-1.895786}, {2.0028071}, {-1.895786}, {2.0028071}); const auto transpose3 = std::make_shared(fq3, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)};