Skip to content

Commit

Permalink
[Snippets][CPU][Tests] Added tests for dynamic BF16/INT8 MHA (openvin…
Browse files Browse the repository at this point in the history
…otoolkit#27169)

### Details:
 - *Added more tests for the validation of INT8/BF16 MHA in CPU Plugin*
- *Split the large "mha.cpp" file into the several small files with the
same test semantic (comment
openvinotoolkit#26547 (comment)

### Tickets:
 - *N/A*
  • Loading branch information
a-sidorova authored Oct 31, 2024
1 parent 86083e0 commit c685d44
Show file tree
Hide file tree
Showing 17 changed files with 807 additions and 620 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*smoke_FakeQuantize.*/FakeQuantizeLayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*)",
R"(.*smoke_FakeQuantizePerChannel.*/FakeQuantizeLayerTest.Inference.*TS=.*11.10.22.19.*LEVELS=(255|256).*netPRC=f32.*)",
R"(.*smoke_MVN_5D/Mvn6LayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*netPRC=f16.*)",
R"(.*smoke_Snippets_MHAINT8MatMul/MHAINT8MatMul.*)",
R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.*2.1.5.*2.1.1.*2.1.1.*)",
R"(.*smoke_InterpolateBicubicPillow_Layout_Test/InterpolateLayerCPUTest.CompareWithRefs/ShapeCalcMode=sizes_IS=\[?.2..20.?.?\]_TS.*1.17.4.4.*2.3.10.12.*1.17.4.4.*Sizes.*4.4.*10.20.*10.4.*PARAMETER.*0.0.0.0.*0.0.1.1.*2.3.*)",
R"(.*smoke_LoopForCommon/LoopLayerCPUTest.CompareWithRefs/.*_netType=bf16.*)",
Expand Down Expand Up @@ -563,7 +562,7 @@ std::vector<std::string> disabledTestPatterns() {
// ignored for not supported bf16 platforms
retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)");
retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");
retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)");
retVector.emplace_back(R"(.*smoke_Snippets_MHA.*EnforceBF16.*)");
retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)");
}
// [150842] Need to support dynamic K dimension of BF16|INT8 MatMul on AMX systems
Expand All @@ -572,6 +571,11 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*smoke_Snippets_MatMul/MatMul.CompareWithRefImpl/.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
retVector.emplace_back(R"(.*smoke_Snippets_MatMulTransposeB.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
retVector.emplace_back(R"(.*smoke_Snippets_MatMulBias.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");

retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16_3D.*IS\[1\]=\[2.64.\?\].*)");
retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[(\?|1).(\?|4).(\?|12).(\?|64)\].*)");
retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[\?.\?.\?\].*)");
retVector.emplace_back(R"(.*smoke_Snippets_(MHAINT8MatMul|MHAQuantMatMul0|MHAFQAfterMatMul_4D|smoke_Snippets_MHAFQ).*IS\[0\]=\[\?.\?.\?\.\?].*)");
}
#ifdef SNIPPETS_LIBXSMM_TPP
// GN in TPP requires exposing tmp Buffer results outside the loop (ticket: 151234)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,26 @@

#include "snippets/matmul.hpp"

#include "common_test_utils/test_constants.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "utils.hpp"

namespace ov {
namespace test {
namespace snippets {

#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})

namespace {

static inline std::vector<std::vector<element::Type>> quantized_precisions() {
std::vector<std::vector<element::Type>> prc = {};
// In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) {
prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
}
return prc;
}

static inline std::vector<std::vector<element::Type>> precisions() {
std::vector<std::vector<element::Type>> prc = {
{element::f32, element::f32},
};
std::vector<std::vector<element::Type>> prc = precision_f32(2);
// Note: TPP doesn't support low precisions yet
#ifndef SNIPPETS_LIBXSMM_TPP
auto quant = quantized_precisions();
auto quant = quantized_precisions_if_supported();
std::copy(quant.begin(), quant.end(), std::back_inserter(prc));
// In Snippets MatMul BF16 is supported only on bf16/AMX platforms
if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
}
auto bfloat = precision_bf16_if_supported(2);
std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc));
#endif
return prc;
}


std::vector<std::vector<ov::test::InputShape>> input_shapes{
{ {{}, {{2, 1, 3, 5}}}, {{}, {{1, 3, 5, 3}}} },
{ {{}, {{3, 1, 32, 14}}}, {{}, {{1, 3, 14, 37}}} },
Expand Down Expand Up @@ -158,7 +140,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias,
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized,
::testing::Combine(
::testing::ValuesIn(input_shapes_bias),
::testing::ValuesIn(quantized_precisions()),
::testing::ValuesIn(quantized_precisions_if_supported()),
::testing::Values(MatMulType::MatMul),
::testing::Values(1), // Subgraph
::testing::Values(1), // Tokenized MatMul+Bias
Expand All @@ -167,8 +149,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized

INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized,
::testing::Combine(
::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
::testing::ValuesIn(quantized_precisions()),
::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
::testing::ValuesIn(quantized_precisions_if_supported()),
::testing::Values(MatMulType::MatMul),
::testing::Values(3), // Subgraph + Reshape + Subgraph
::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ]
Expand All @@ -177,8 +159,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized,

INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax,
::testing::Combine(
::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
::testing::ValuesIn(quantized_precisions()),
::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
::testing::ValuesIn(quantized_precisions_if_supported()),
::testing::Values(MatMulType::MatMul),
::testing::Values(3), // Subgraph + Reshape + Subgraph
::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ]
Expand Down
Loading

0 comments on commit c685d44

Please sign in to comment.