[Snippets][CPU][Tests] Added tests for dynamic BF16/INT8 MHA (openvin…

…otoolkit#27169) ### Details: - *Added more tests for the validation of INT8/BF16 MHA in CPU Plugin* - *Split the large "mha.cpp" file into the several small files with the same test semantic (comment openvinotoolkit#26547 (comment) ### Tickets: - *N/A*
praasz · Oct 31, 2024 · c685d44 · c685d44
1 parent 86083e0
commit c685d44
Show file tree

Hide file tree

Showing 17 changed files with 807 additions and 620 deletions.
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -237,7 +237,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*smoke_FakeQuantize.*/FakeQuantizeLayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*)",
         R"(.*smoke_FakeQuantizePerChannel.*/FakeQuantizeLayerTest.Inference.*TS=.*11.10.22.19.*LEVELS=(255|256).*netPRC=f32.*)",
         R"(.*smoke_MVN_5D/Mvn6LayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*netPRC=f16.*)",
-        R"(.*smoke_Snippets_MHAINT8MatMul/MHAINT8MatMul.*)",
         R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.*2.1.5.*2.1.1.*2.1.1.*)",
         R"(.*smoke_InterpolateBicubicPillow_Layout_Test/InterpolateLayerCPUTest.CompareWithRefs/ShapeCalcMode=sizes_IS=\[?.2..20.?.?\]_TS.*1.17.4.4.*2.3.10.12.*1.17.4.4.*Sizes.*4.4.*10.20.*10.4.*PARAMETER.*0.0.0.0.*0.0.1.1.*2.3.*)",
         R"(.*smoke_LoopForCommon/LoopLayerCPUTest.CompareWithRefs/.*_netType=bf16.*)",
@@ -563,7 +562,7 @@ std::vector<std::string> disabledTestPatterns() {
         // ignored for not supported bf16 platforms
         retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)");
         retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");
-        retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_MHA.*EnforceBF16.*)");
         retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)");
     }
     // [150842] Need to support dynamic K dimension of BF16|INT8 MatMul on AMX systems
@@ -572,6 +571,11 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(.*smoke_Snippets_MatMul/MatMul.CompareWithRefImpl/.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
         retVector.emplace_back(R"(.*smoke_Snippets_MatMulTransposeB.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
         retVector.emplace_back(R"(.*smoke_Snippets_MatMulBias.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
+
+        retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16_3D.*IS\[1\]=\[2.64.\?\].*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[(\?|1).(\?|4).(\?|12).(\?|64)\].*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[\?.\?.\?\].*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_(MHAINT8MatMul|MHAQuantMatMul0|MHAFQAfterMatMul_4D|smoke_Snippets_MHAFQ).*IS\[0\]=\[\?.\?.\?\.\?].*)");
     }
 #ifdef SNIPPETS_LIBXSMM_TPP
     // GN in TPP requires exposing tmp Buffer results outside the loop (ticket: 151234)

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
@@ -4,44 +4,26 @@
 
 #include "snippets/matmul.hpp"
 
-#include "common_test_utils/test_constants.hpp"
-#include "openvino/runtime/system_conf.hpp"
+#include "utils.hpp"
 
 namespace ov {
 namespace test {
 namespace snippets {
 
-#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})
-
 namespace {
 
-static inline std::vector<std::vector<element::Type>> quantized_precisions() {
-    std::vector<std::vector<element::Type>> prc = {};
-    // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
-    if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) {
-        prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
-        prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
-    }
-    return prc;
-}
-
 static inline std::vector<std::vector<element::Type>> precisions() {
-    std::vector<std::vector<element::Type>> prc = {
-        {element::f32, element::f32},
-    };
+    std::vector<std::vector<element::Type>> prc = precision_f32(2);
 // Note: TPP doesn't support low precisions yet
 #ifndef SNIPPETS_LIBXSMM_TPP
-    auto quant = quantized_precisions();
+    auto quant = quantized_precisions_if_supported();
     std::copy(quant.begin(), quant.end(), std::back_inserter(prc));
-    // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
-    if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
-        prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
-    }
+    auto bfloat = precision_bf16_if_supported(2);
+    std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc));
 #endif
     return prc;
 }
 
-
 std::vector<std::vector<ov::test::InputShape>> input_shapes{
     { {{}, {{2, 1, 3, 5}}},   {{}, {{1, 3, 5, 3}}} },
     { {{}, {{3, 1, 32, 14}}},   {{}, {{1, 3, 14, 37}}} },
@@ -158,7 +140,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias,
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized,
                          ::testing::Combine(
                                  ::testing::ValuesIn(input_shapes_bias),
-                                 ::testing::ValuesIn(quantized_precisions()),
+                                 ::testing::ValuesIn(quantized_precisions_if_supported()),
                                  ::testing::Values(MatMulType::MatMul),
                                  ::testing::Values(1), // Subgraph
                                  ::testing::Values(1), // Tokenized MatMul+Bias
@@ -167,8 +149,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized,
                          ::testing::Combine(
-                                 ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
-                                 ::testing::ValuesIn(quantized_precisions()),
+                                 ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
+                                 ::testing::ValuesIn(quantized_precisions_if_supported()),
                                  ::testing::Values(MatMulType::MatMul),
                                  ::testing::Values(3), // Subgraph + Reshape + Subgraph
                                  ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ]
@@ -177,8 +159,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized,
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax,
                          ::testing::Combine(
-                                 ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
-                                 ::testing::ValuesIn(quantized_precisions()),
+                                 ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
+                                 ::testing::ValuesIn(quantized_precisions_if_supported()),
                                  ::testing::Values(MatMulType::MatMul),
                                  ::testing::Values(3), // Subgraph + Reshape + Subgraph
                                  ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ]