diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 6edc4f062536d0..90820d550df179 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -237,7 +237,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*smoke_FakeQuantize.*/FakeQuantizeLayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*)",
         R"(.*smoke_FakeQuantizePerChannel.*/FakeQuantizeLayerTest.Inference.*TS=.*11.10.22.19.*LEVELS=(255|256).*netPRC=f32.*)",
         R"(.*smoke_MVN_5D/Mvn6LayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*netPRC=f16.*)",
-        R"(.*smoke_Snippets_MHAINT8MatMul/MHAINT8MatMul.*)",
         R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.*2.1.5.*2.1.1.*2.1.1.*)",
         R"(.*smoke_InterpolateBicubicPillow_Layout_Test/InterpolateLayerCPUTest.CompareWithRefs/ShapeCalcMode=sizes_IS=\[?.2..20.?.?\]_TS.*1.17.4.4.*2.3.10.12.*1.17.4.4.*Sizes.*4.4.*10.20.*10.4.*PARAMETER.*0.0.0.0.*0.0.1.1.*2.3.*)",
         R"(.*smoke_LoopForCommon/LoopLayerCPUTest.CompareWithRefs/.*_netType=bf16.*)",
@@ -563,7 +562,7 @@ std::vector<std::string> disabledTestPatterns() {
         // ignored for not supported bf16 platforms
         retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)");
         retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");
-        retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_MHA.*EnforceBF16.*)");
         retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)");
     }
     // [150842] Need to support dynamic K dimension of BF16|INT8 MatMul on AMX systems
@@ -572,6 +571,11 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(.*smoke_Snippets_MatMul/MatMul.CompareWithRefImpl/.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
         retVector.emplace_back(R"(.*smoke_Snippets_MatMulTransposeB.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
         retVector.emplace_back(R"(.*smoke_Snippets_MatMulBias.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)");
+
+        retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16_3D.*IS\[1\]=\[2.64.\?\].*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[(\?|1).(\?|4).(\?|12).(\?|64)\].*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[\?.\?.\?\].*)");
+        retVector.emplace_back(R"(.*smoke_Snippets_(MHAINT8MatMul|MHAQuantMatMul0|MHAFQAfterMatMul_4D|smoke_Snippets_MHAFQ).*IS\[0\]=\[\?.\?.\?\.\?].*)");
     }
 #ifdef SNIPPETS_LIBXSMM_TPP
     // GN in TPP requires exposing tmp Buffer results outside the loop (ticket: 151234)
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
index f5057137f9b65c..176f0cb4d46aed 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
@@ -4,44 +4,26 @@
 
 #include "snippets/matmul.hpp"
 
-#include "common_test_utils/test_constants.hpp"
-#include "openvino/runtime/system_conf.hpp"
+#include "utils.hpp"
 
 namespace ov {
 namespace test {
 namespace snippets {
 
-#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})
-
 namespace {
 
-static inline std::vector<std::vector<element::Type>> quantized_precisions() {
-    std::vector<std::vector<element::Type>> prc = {};
-    // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
-    if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) {
-        prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
-        prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
-    }
-    return prc;
-}
-
 static inline std::vector<std::vector<element::Type>> precisions() {
-    std::vector<std::vector<element::Type>> prc = {
-        {element::f32, element::f32},
-    };
+    std::vector<std::vector<element::Type>> prc = precision_f32(2);
 // Note: TPP doesn't support low precisions yet
 #ifndef SNIPPETS_LIBXSMM_TPP
-    auto quant = quantized_precisions();
+    auto quant = quantized_precisions_if_supported();
     std::copy(quant.begin(), quant.end(), std::back_inserter(prc));
-    // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
-    if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
-        prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
-    }
+    auto bfloat = precision_bf16_if_supported(2);
+    std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc));
 #endif
     return prc;
 }
 
-
 std::vector<std::vector<ov::test::InputShape>> input_shapes{
     { {{}, {{2, 1, 3, 5}}},   {{}, {{1, 3, 5, 3}}} },
     { {{}, {{3, 1, 32, 14}}},   {{}, {{1, 3, 14, 37}}} },
@@ -158,7 +140,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias,
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized,
                          ::testing::Combine(
                                  ::testing::ValuesIn(input_shapes_bias),
-                                 ::testing::ValuesIn(quantized_precisions()),
+                                 ::testing::ValuesIn(quantized_precisions_if_supported()),
                                  ::testing::Values(MatMulType::MatMul),
                                  ::testing::Values(1), // Subgraph
                                  ::testing::Values(1), // Tokenized MatMul+Bias
@@ -167,8 +149,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized,
                          ::testing::Combine(
-                                 ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
-                                 ::testing::ValuesIn(quantized_precisions()),
+                                 ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
+                                 ::testing::ValuesIn(quantized_precisions_if_supported()),
                                  ::testing::Values(MatMulType::MatMul),
                                  ::testing::Values(3), // Subgraph + Reshape + Subgraph
                                  ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ]
@@ -177,8 +159,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized,
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax,
                          ::testing::Combine(
-                                 ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
-                                 ::testing::ValuesIn(quantized_precisions()),
+                                 ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})),
+                                 ::testing::ValuesIn(quantized_precisions_if_supported()),
                                  ::testing::Values(MatMulType::MatMul),
                                  ::testing::Values(3), // Subgraph + Reshape + Subgraph
                                  ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ]
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp
index 79db0b1546b2a8..63f5176684ccc1 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp
@@ -1,60 +1,70 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "snippets/mha.hpp"
 
-#include "common_test_utils/test_constants.hpp"
-#include "internal_properties.hpp"
-#include "utils/cpu_test_utils.hpp"
-#include "openvino/runtime/system_conf.hpp"
+#include "utils.hpp"
 
 namespace ov {
 namespace test {
 namespace snippets {
 
-#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})
 namespace {
 
-const auto& inputShapes_4D = STATIC_SHAPES(
-    {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}},
-    {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}},
-    {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}},
-    {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}},
-    {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}});
-
-const auto& inputShapes_3D = STATIC_SHAPES(
-    {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}},
-    {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}},
-    {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}});
-
-static inline bool is_bf16_supported() {
-    return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16();
-}
-
-static inline std::vector<std::vector<element::Type>> precision_f32(size_t count) {
-    std::vector<std::vector<element::Type>> prc;
-    prc.emplace_back(std::vector<element::Type>(count, element::f32));
-    return prc;
-}
-
-static inline std::vector<std::vector<element::Type>> precision_bf16(size_t count) {
-    std::vector<std::vector<element::Type>> prc;
-    if (is_bf16_supported())
-        prc.emplace_back(std::vector<element::Type>(count, element::bf16));
-    return prc;
+std::vector<std::vector<InputShape>> transposedShape_4D(bool with_dynamic = true) {
+    auto shapes = SNIPPETS_TESTS_STATIC_SHAPES(
+        {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}},
+        {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}},
+        {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}},
+        {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}},
+        {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}});
+    if (with_dynamic) {
+        std::vector<std::vector<ov::test::InputShape>> dynamic_shapes = {{
+            {PartialShape{-1, -1, -1, 100},  {{1, 64, 4, 100},  {2, 16, 2, 100},  {1, 72, 4, 100}}},
+            {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}},
+            {PartialShape{-1, -1, -1, 128},  {{1, 4, 64, 128},  {2, 2, 16, 128},  {1, 4, 72, 128}}},
+            {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}},
+        },
+        {
+            {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64},  {2, 16, 2, 100},  {1, 128, 3, 64}}},
+            {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64},  {2, 128, 2, 100}, {1, 128, 1, 64}}},
+            {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {2, 2, 16, 128},  {2, 1, 128, 128}}},
+            {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64},  {2, 128, 2, 100}, {1, 128, 3, 64}}},
+        },
+        {
+            {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}},
+            {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}},
+            {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1},  {2, 12, 70, 35}}},
+            {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}},
+        }};
+        shapes.insert(shapes.end(), dynamic_shapes.begin(), dynamic_shapes.end());
+    }
+    return shapes;
 }
 
-static ov::AnyMap enable_callback() {
-    return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)});
+std::vector<std::vector<InputShape>> transposedShape_3D(bool with_dynamic = true) {
+    auto shapes = SNIPPETS_TESTS_STATIC_SHAPES(
+        {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}},
+        {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}},
+        {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}});
+    if (with_dynamic) {
+        shapes.push_back({
+            {PartialShape{-1, -1, -1}, {{128, 3, 64},  {128, 3, 64},  {68, 6, 87}}},
+            {PartialShape{-1, -1, -1}, {{128, 1, 64},  {128, 1, 64},  {13, 6, 87}}},
+            {PartialShape{-1, -1, -1}, {{1, 128, 128}, {1, 128, 128}, {1, 68, 13}}},
+            {PartialShape{-1, -1, -1}, {{128, 3, 64},  {128, 3, 64},  {13, 6, 87}}},
+        });
+    }
+    return shapes;
 }
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D,
                          MHA,
-                         ::testing::Combine(::testing::ValuesIn(inputShapes_4D),
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_4D()),
                                             ::testing::ValuesIn(precision_f32(4)),
                                             ::testing::Values(ov::element::f32),
-                                            ::testing::ValuesIn({false, true}),
+                                            ::testing::Values(false),
                                             ::testing::Values(MHA::default_thread_count),
                                             ::testing::Values(1),
                                             ::testing::Values(1),
@@ -62,27 +72,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D,
                                             ::testing::Values(CPUTestUtils::empty_plugin_config)),
                          MHA::getTestCaseName);
 
-std::vector<std::vector<ov::test::InputShape>> inputShapes_4D_dynamic{
-        {
-            {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}},
-            {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}},
-            {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}},
-            {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}},
-        },
-        {
-            {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}},
-            {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}},
-            {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1},  {2, 12, 70, 35}}},
-            {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}},
-        }
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D,
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D_WithScalarMul,
                          MHA,
-                         ::testing::Combine(::testing::ValuesIn(inputShapes_4D_dynamic),
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_4D(false)),
                                             ::testing::ValuesIn(precision_f32(4)),
                                             ::testing::Values(ov::element::f32),
-                                            ::testing::ValuesIn({false}),
+                                            ::testing::Values(true),
                                             ::testing::Values(MHA::default_thread_count),
                                             ::testing::Values(1),
                                             ::testing::Values(1),
@@ -90,13 +85,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D,
                                             ::testing::Values(CPUTestUtils::empty_plugin_config)),
                          MHA::getTestCaseName);
 
-
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D,
                          MHA,
-                         ::testing::Combine(::testing::ValuesIn(inputShapes_3D),
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_3D()),
                                             ::testing::ValuesIn(precision_f32(4)),
                                             ::testing::Values(ov::element::f32),
-                                            ::testing::ValuesIn({false, true}),
+                                            ::testing::Values(false),
                                             ::testing::Values(MHA::default_thread_count),
                                             ::testing::Values(5),  // [122706]: Subgraph + 4 Transpose
                                             ::testing::Values(2),  // decomposed Transpose + MHA
@@ -104,111 +98,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D,
                                             ::testing::Values(CPUTestUtils::empty_plugin_config)),
                          MHA::getTestCaseName);
 
-const auto& splitm_static_shapes = STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}});
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHA_4D_SplitDimensionM_static,
-    MHA,
-    ::testing::Combine(::testing::ValuesIn(splitm_static_shapes),
-                       ::testing::ValuesIn(precision_f32(4)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::Values(true),
-                       ::testing::Values(4),  // 4 Threads
-                       ::testing::Values(6),  // Subgraph + 4 Reshapes on inputs and 1 Reshape on output
-                       ::testing::Values(1),
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(enable_callback())),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHA_3D_SplitDimensionM_static,
-    MHA,
-    ::testing::Combine(
-        ::testing::ValuesIn(STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})),
-        ::testing::ValuesIn(precision_f32(4)),
-        ::testing::Values(ov::element::f32),
-        ::testing::Values(true),
-        ::testing::Values(4),   // 4 Threads
-        ::testing::Values(10),  // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes
-        ::testing::Values(1),   // MHA
-        ::testing::Values(ov::test::utils::DEVICE_CPU),
-        ::testing::Values(enable_callback())),
-    MHA::getTestCaseName);
-
-std::vector<std::vector<ov::test::InputShape>> splitm_dynamic_shapes_4d = {
-    {
-        {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}},
-    },
-    {
-        {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
-        {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}},
-    },
-    {
-        {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
-        {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}},
-    },
-    {
-        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
-        {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}},
-    },
-};
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHA_4D_SplitDimensionM_dynamic,
-    MHA,
-    ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d),
-                       ::testing::ValuesIn(precision_f32(4)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::Values(false),
-                       ::testing::Values(4),  // 4 Threads
-                       ::testing::Values(1),
-                       ::testing::Values(1),
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-std::vector<std::vector<ov::test::InputShape>> splitm_dynamic_shapes_3d = {
-    {
-        {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}},
-        {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}},
-        {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}},
-        {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}},
-    },
-    {
-        {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}},
-        {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}},
-        {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}},
-        {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}},
-    },
-};
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHA_3D_SplitDimensionM_dynamic,
-    MHA,
-    ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d),
-                       ::testing::ValuesIn(precision_f32(4)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::Values(false),
-                       ::testing::Values(4),  // 4 Threads
-                       ::testing::Values(5),  // Subgraph + 4 Transpose
-                       ::testing::Values(2),  // MHA + one of the transposes is executed via Subgraph (because callback is disabled)
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D_WithScalarMul,
+                         MHA,
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_3D(false)),
+                                            ::testing::ValuesIn(precision_f32(4)),
+                                            ::testing::Values(ov::element::f32),
+                                            ::testing::Values(true),
+                                            ::testing::Values(MHA::default_thread_count),
+                                            ::testing::Values(5),  // [122706]: Subgraph + 4 Transpose
+                                            ::testing::Values(2),  // decomposed Transpose + MHA
+                                            ::testing::Values(ov::test::utils::DEVICE_CPU),
+                                            ::testing::Values(CPUTestUtils::empty_plugin_config)),
+                         MHA::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D,
                          MHA,
-                         ::testing::Combine(::testing::ValuesIn(inputShapes_4D),
-                                            ::testing::ValuesIn(precision_bf16(4)),
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_4D()),
+                                            ::testing::ValuesIn(precision_bf16_if_supported(4)),
                                             ::testing::Values(ov::element::f32),
                                             ::testing::ValuesIn({false, true}),
                                             ::testing::Values(MHA::default_thread_count),
@@ -220,7 +126,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D,
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16,
                          MHA,
-                         ::testing::Combine(::testing::ValuesIn(inputShapes_4D),
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_4D()),
                                             ::testing::ValuesIn(precision_f32(4)),
                                             ::testing::Values(ov::element::bf16),
                                             ::testing::ValuesIn({false}),
@@ -231,321 +137,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16,
                                             ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
                          MHA::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAMulAdd,
-    MHAMulAdd,
-    ::testing::Combine(
-        ::testing::ValuesIn(STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})),
-        ::testing::ValuesIn(precision_f32(3)),
-        ::testing::Values(ov::element::f32),
-        ::testing::ValuesIn({false}),  // Need to support True for graph builder in tests
-        ::testing::Values(MHA::default_thread_count),
-        ::testing::Values(1),
-        ::testing::Values(1),
-        ::testing::Values(ov::test::utils::DEVICE_CPU),
-        ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-const auto& inputShapeSelect = STATIC_SHAPES(
-    // without broadcast
-    {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}},
-    {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}},
-    // with broadcast
-    {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}},
-    {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}}
-);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHA,
-    MHASelect,
-    ::testing::Combine(::testing::ValuesIn(inputShapeSelect),
-                       ::testing::ValuesIn(precision_f32(6)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::Values(false),  // Need to support True for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(2),  // Less + MHA
-                       ::testing::Values(2),
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-const auto& inputShapesWOTranspose_4D = STATIC_SHAPES(
-    {{1, 12, 197, 64}, {1, 12, 64, 197}, {1, 12, 197, 64}},
-    {{1, 12, 12, 64}, {1, 12, 64, 48}, {1, 12, 48, 64}});
-const auto& inputShapesWOTranspose_3D = STATIC_SHAPES(
-    {{12, 197, 64}, {12, 64, 197}, {12, 197, 64}},
-    {{12, 128, 100}, {12, 100, 128}, {12, 128, 100}});
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWOTransposeOnInputs_4D,
-    MHAWOTransposeOnInputs,
-    ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D),
-                       ::testing::Values(std::vector<ov::element::Type>{}),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::Values(true),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(1),
-                       ::testing::Values(1),
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWOTranspose_4D,
-    MHAWOTranspose,
-    ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D),
-                       ::testing::ValuesIn(precision_f32(3)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(1),
-                       ::testing::Values(1),
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWOTranspose_3D,
-    MHAWOTranspose,
-    ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D),
-                       ::testing::ValuesIn(precision_f32(3)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(1),
-                       ::testing::Values(1),
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-std::vector<std::vector<ov::test::InputShape>> inputShapesWOTranspose_3D_dynamic{
-        {
-                {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}},
-                {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}},
-                {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}},
-        },
-        {
-                {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}},
-                {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 2}, {2, 64, 9}}},
-                {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}},
-        },
-};
-
-
-
-INSTANTIATE_TEST_SUITE_P(
-        smoke_Snippets_DynMHAWOTranspose_3D,
-        MHAWOTranspose,
-        ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D_dynamic),
-                           ::testing::ValuesIn(precision_f32(3)),
-                           ::testing::Values(ov::element::f32),
-                           ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                           ::testing::Values(MHA::default_thread_count),
-                           ::testing::Values(1),
-                           ::testing::Values(1),
-                           ::testing::Values(ov::test::utils::DEVICE_CPU),
-                           ::testing::Values(CPUTestUtils::empty_plugin_config)),
-        MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWOTransposeBF16_4D,
-    MHAWOTranspose,
-    ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D),
-                       ::testing::ValuesIn(precision_bf16(3)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWOTransposeBF16_3D,
-    MHAWOTranspose,
-    ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D),
-                       ::testing::ValuesIn(precision_bf16(3)),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWOTransposeEnforceBF16_4D,
-    MHAWOTranspose,
-    ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D),
-                       ::testing::ValuesIn(precision_f32(3)),
-                       ::testing::Values(ov::element::bf16),
-                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWOTransposeEnforceBF16_3D,
-    MHAWOTranspose,
-    ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D),
-                       ::testing::ValuesIn(precision_f32(3)),
-                       ::testing::Values(ov::element::bf16),
-                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAINT8MatMul,
-    MHAINT8MatMul,
-    ::testing::Combine(::testing::ValuesIn(std::vector<std::vector<InputShape>>(inputShapes_4D.begin(),
-                                                                                      inputShapes_4D.begin() + 2)),
-                       ::testing::Values(std::vector<element::Type>{}),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::Values(false),  // The graph doesn't contain Multiply
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(6),  // FQx3 on inputs + MHA + Transpose on output + Deq Mul
-                       ::testing::Values(5),  // FQx3 on inputs + MHA + Deq Mul
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAQuantMatMul0,
-    MHAQuantMatMul0,
-    ::testing::Combine(
-        ::testing::ValuesIn(STATIC_SHAPES({{1, 128, 768}, {1, 128, 768}, {1, 1, 1, 128}, {1, 128, 768}})),
-        ::testing::Values(std::vector<element::Type>{}),
-        ::testing::Values(ov::element::f32),
-        ::testing::Values(false),  // The graph doesn't contain Multiply
-        ::testing::Values(MHA::default_thread_count),
-        ::testing::Values(9),  // FQx2 on inputs + MHA + Transpose on output + 4 Reshapes + Deq Mul
-        ::testing::Values(4),  // FQx2 on inputs + MHA + Deq Mul
-        ::testing::Values(ov::test::utils::DEVICE_CPU),
-        ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFQAfterMatMul_4D,
-                         MHAFQAfterMatMul,
-                         ::testing::Combine(::testing::ValuesIn(inputShapes_4D),
-                                            ::testing::Values(std::vector<element::Type>{}),
-                                            ::testing::Values(ov::element::f32),
-                                            ::testing::Values(false),  // The graph doesn't contain Multiply
-                                            ::testing::Values(MHA::default_thread_count),
-                                            ::testing::Values(3),  // MHA + Transpose on output + Deq Mul
-                                            ::testing::Values(2),  // MHA + Deq Mul
-                                            ::testing::Values(ov::test::utils::DEVICE_CPU),
-                                            ::testing::Values(CPUTestUtils::empty_plugin_config)),
-                         MHA::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAFQ,
-    MHAFQ,
-    ::testing::Combine(::testing::ValuesIn(STATIC_SHAPES({{1, 64, 12, 64},
-                                                          {1, 64, 12, 64},
-                                                          {1, 1, 1, 64},
-                                                          {1, 64, 12, 64}})),
-                       ::testing::Values(std::vector<element::Type>{}),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::Values(false),  // The graph doesn't contain Multiply
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(7),  // Transposex2 + Subgraphsx5
-                       ::testing::Values(5),  // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-std::vector<std::vector<ov::test::InputShape>> inputShapesTransposedB {
-    {
-        {{}, {{1, 12, 12, 64}}},
-        {{}, {{1, 12, 48, 64}}},
-        {{}, {{1, 12, 48, 64}}}
-    },
-    {
-        {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}},
-        {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}},
-        {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}},
-    },
-    {
-        {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}},
-        {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}},
-        {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}},
-    },
-};
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHATransposedB,
-    MHATransposedB,
-    ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB),
-                       ::testing::Values(std::vector<element::Type>{}),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(1),
-                       ::testing::Values(1),
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-const auto& inputShapesExtractedReshape = STATIC_SHAPES(
-    {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}},
-    {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}},
-    {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}},
-    {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}},
-    {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}},
-    {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}});
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke_Snippets_MHAWithExtractedReshape,
-    MHAWithExtractedReshape,
-    ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape),
-                       ::testing::Values(std::vector<element::Type>{}),
-                       ::testing::Values(ov::element::f32),
-                       ::testing::ValuesIn({true}),  // False is not supported for graph builder in tests
-                       ::testing::Values(MHA::default_thread_count),
-                       ::testing::Values(3),  // Extracted Add + Extracted Reshape + MHA
-                       ::testing::Values(2),  // Extracted Add + MHA
-                       ::testing::Values(ov::test::utils::DEVICE_CPU),
-                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
-    MHA::getTestCaseName);
-
-std::vector<std::vector<ov::test::InputShape>> inputShapes_4D_WithMul_dynamic{
-        {
-            {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}},
-            {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}},
-            {PartialShape{1},              {{1},             {1},            {1},             {1} }},
-            {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}},
-            {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}},
-        },
-        {
-            {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}},
-            {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64},  {2, 10, 12, 64}, {1, 35, 12, 64}}},
-            {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1},  {1, 12, 64, 35}}},
-            {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1},  {2, 12, 70, 35}}},
-            {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}},
-        }
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D_WithMul,
-                         MHAWithDynamicMul,
-                         ::testing::Combine(::testing::ValuesIn(inputShapes_4D_WithMul_dynamic),
-                                            ::testing::ValuesIn(precision_f32(5)),
-                                            ::testing::Values(ov::element::f32),
-                                            ::testing::Values(MHA::default_thread_count),
-                                            ::testing::Values(1),
-                                            ::testing::Values(1),
-                                            ::testing::Values(ov::test::utils::DEVICE_CPU),
-                                            ::testing::Values(CPUTestUtils::empty_plugin_config)),
-                         MHAWithDynamicMul::getTestCaseName);
-
 }  // namespace
 }  // namespace snippets
 }  // namespace test
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp
new file mode 100644
index 00000000000000..f3c1439395650a
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+const auto& inputShapesExtractedReshape = SNIPPETS_TESTS_STATIC_SHAPES(
+    {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}},
+    {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}},
+    {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}},
+    {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}},
+    {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}},
+    {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}});
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWithExtractedReshape,
+    MHAWithExtractedReshape,
+    ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape),
+                       ::testing::Values(std::vector<element::Type>{}),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::ValuesIn({true}),  // False is not supported for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(3),  // Extracted Add + Extracted Reshape + MHA
+                       ::testing::Values(2),  // Extracted Add + MHA
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp
new file mode 100644
index 00000000000000..4bf35e2daa690d
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAMulAdd,
+    MHAMulAdd,
+    ::testing::Combine(
+        ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})),
+        ::testing::ValuesIn(precision_f32(3)),
+        ::testing::Values(ov::element::f32),
+        ::testing::ValuesIn({false}),  // Need to support True for graph builder in tests
+        ::testing::Values(MHA::default_thread_count),
+        ::testing::Values(1),
+        ::testing::Values(1),
+        ::testing::Values(ov::test::utils::DEVICE_CPU),
+        ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp
new file mode 100644
index 00000000000000..0c731b74565863
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+std::vector<std::vector<InputShape>> inputShapesQuantized {
+    {
+        {{}, {{1, 128, 16, 64}}},
+        {{}, {{1, 128, 16, 64}}},
+        {{}, {{1, 16, 1, 1}}},
+        {{}, {{1, 128, 16, 64}}}
+    },
+    {
+        {{}, {{2, 68, 6, 92}}},
+        {{}, {{2, 68, 6, 92}}},
+        {{}, {{1, 1, 68, 68}}},
+        {{}, {{2, 68, 6, 92}}}
+    },
+    // K, N are static
+    {
+        {PartialShape{-1, -1, -1, 100},  {{1, 64, 4, 100},  {2, 16, 2, 100},  {1, 72, 4, 100}}},
+        {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}},
+        {PartialShape{-1, -1, -1, 128},  {{1, 4, 64, 128},  {2, 2, 16, 128},  {1, 4, 72, 128}}},
+        {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}},
+    },
+    {
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64},  {2, 16, 2, 100},  {1, 128, 3, 64},  {1, 128, 12, 600}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64},  {2, 128, 2, 100}, {1, 128, 1, 64},  {1, 128, 12, 600}}},
+        {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 1, 128},   {2, 1, 128, 128}, {1, 12, 1, 1}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64},  {2, 128, 2, 100}, {1, 128, 3, 64},  {1, 128, 12, 600}}},
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAINT8MatMul,
+    MHAINT8MatMul,
+    ::testing::Combine(::testing::ValuesIn(inputShapesQuantized),
+                       ::testing::Values(std::vector<element::Type>{}),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(false),  // The graph doesn't contain Multiply
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(6),  // FQx3 on inputs + MHA + Transpose on output + Deq Mul
+                       ::testing::Values(5),  // FQx3 on inputs + MHA + Deq Mul
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAQuantMatMul0,
+    MHAQuantMatMul0,
+    ::testing::Combine(
+        ::testing::ValuesIn(inputShapesQuantized),
+        ::testing::Values(std::vector<element::Type>{}),
+        ::testing::Values(ov::element::f32),
+        ::testing::Values(false),  // The graph doesn't contain Multiply
+        ::testing::Values(MHA::default_thread_count),
+        ::testing::Values(5),  // FQx2 on inputs + MHA + Transpose on output + Deq Mul
+        ::testing::Values(4),  // FQx2 on inputs + MHA + Deq Mul
+        ::testing::Values(ov::test::utils::DEVICE_CPU),
+        ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAFQAfterMatMul_4D,
+    MHAFQAfterMatMul,
+    ::testing::Combine(::testing::ValuesIn(inputShapesQuantized),
+                       ::testing::Values(std::vector<element::Type>{}),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(false),  // The graph doesn't contain Multiply
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(3),  // MHA + Transpose on output + Deq Mul
+                       ::testing::Values(2),  // MHA + Deq Mul
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAFQ,
+    MHAFQ,
+    ::testing::Combine(::testing::ValuesIn(inputShapesQuantized),
+                       ::testing::Values(std::vector<element::Type>{}),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(false),  // The graph doesn't contain Multiply
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(7),  // Transposex2 + Subgraphsx5
+                       ::testing::Values(5),  // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp
new file mode 100644
index 00000000000000..3fc1417d20b102
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+const auto& inputShapeSelect = SNIPPETS_TESTS_STATIC_SHAPES(
+    // without broadcast
+    {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}},
+    {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}},
+    // with broadcast
+    {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}},
+    {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}}
+);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHA,
+    MHASelect,
+    ::testing::Combine(::testing::ValuesIn(inputShapeSelect),
+                       ::testing::ValuesIn(precision_f32(6)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(false),  // Need to support True for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(2),  // Less + MHA
+                       ::testing::Values(2),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp
new file mode 100644
index 00000000000000..bb5f7fe2fa5b52
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp
@@ -0,0 +1,121 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+static ov::AnyMap enable_callback() {
+    return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)});
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHA_4D_SplitDimensionM_static,
+    MHA,
+    ::testing::Combine(::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}})),
+                       ::testing::ValuesIn(precision_f32(4)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(true),
+                       ::testing::Values(4),  // 4 Threads
+                       ::testing::Values(6),  // Subgraph + 4 Reshapes on inputs and 1 Reshape on output
+                       ::testing::Values(1),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(enable_callback())),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHA_3D_SplitDimensionM_static,
+    MHA,
+    ::testing::Combine(
+        ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})),
+        ::testing::ValuesIn(precision_f32(4)),
+        ::testing::Values(ov::element::f32),
+        ::testing::Values(true),
+        ::testing::Values(4),   // 4 Threads
+        ::testing::Values(10),  // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes
+        ::testing::Values(1),   // MHA
+        ::testing::Values(ov::test::utils::DEVICE_CPU),
+        ::testing::Values(enable_callback())),
+    MHA::getTestCaseName);
+
+std::vector<std::vector<ov::test::InputShape>> splitm_dynamic_shapes_4d = {
+    {
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}},
+    },
+    {
+        {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
+        {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}},
+    },
+    {
+        {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
+        {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}},
+    },
+    {
+        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}},
+        {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}},
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHA_4D_SplitDimensionM_dynamic,
+    MHA,
+    ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d),
+                       ::testing::ValuesIn(precision_f32(4)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(false),
+                       ::testing::Values(4),  // 4 Threads
+                       ::testing::Values(1),
+                       ::testing::Values(1),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+std::vector<std::vector<ov::test::InputShape>> splitm_dynamic_shapes_3d = {
+    {
+        {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}},
+        {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}},
+        {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}},
+        {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}},
+    },
+    {
+        {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}},
+        {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}},
+        {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}},
+        {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}},
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHA_3D_SplitDimensionM_dynamic,
+    MHA,
+    ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d),
+                       ::testing::ValuesIn(precision_f32(4)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(false),
+                       ::testing::Values(4),  // 4 Threads
+                       ::testing::Values(5),  // Subgraph + 4 Transpose
+                       ::testing::Values(2),  // MHA + one of the transposes is executed via Subgraph (because callback is disabled)
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp
new file mode 100644
index 00000000000000..45260df3cab280
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+std::vector<std::vector<ov::test::InputShape>> inputShapesTransposedB {
+    {
+        {{}, {{1, 12, 12, 64}}},
+        {{}, {{1, 12, 48, 64}}},
+        {{}, {{1, 12, 48, 64}}}
+    },
+    {
+        {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}},
+        {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}},
+        {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}},
+    },
+    {
+        {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}},
+        {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}},
+        {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}},
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHATransposedB,
+    MHATransposedB,
+    ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB),
+                       ::testing::Values(std::vector<element::Type>{}),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::ValuesIn({true}),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(1),
+                       ::testing::Values(1),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp
new file mode 100644
index 00000000000000..7876d737af2281
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+std::vector<std::vector<ov::test::InputShape>> transposedShape_4D_WithMul {
+    {
+        {PartialShape{-1, -1, -1, 100},  {{1, 64, 4, 100},  {2, 16, 2, 100},  {1, 72, 4, 100}}},
+        {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}},
+        {PartialShape{-1, -1, 100, 200}, {{1, 4, 100, 200}, {2, 2, 100, 200}, {1, 4, 100, 200}}},
+        {PartialShape{-1, -1, -1, 200},  {{1, 4, 64, 200},  {2, 2, 16, 200},  {1, 4, 72, 200}}},
+        {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}},
+    },
+    {
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}},
+        {PartialShape{1},              {{1},             {1},            {1},             {1} }},
+        {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}},
+    },
+    {
+        {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}},
+        {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64},  {2, 10, 12, 64}, {1, 35, 12, 64}}},
+        {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1},  {1, 12, 64, 35}}},
+        {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1},  {2, 12, 70, 35}}},
+        {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}},
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHA_4D_WithDynamicMul,
+    MHAWithDynamicMul,
+    ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul),
+                       ::testing::ValuesIn(precision_f32(5)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(1),
+                       ::testing::Values(1),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHAWithDynamicMul::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHA_4D_WithDynamicMul_EnforceBF16,
+    MHAWithDynamicMul,
+    ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul),
+                       ::testing::ValuesIn(precision_f32(5)),
+                       ::testing::Values(ov::element::bf16),
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(8),  // MHA + 1 Transpose on output + 6 Converts around
+                       ::testing::Values(7),  // MHA + 6 Converts around
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHAWithDynamicMul::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp
new file mode 100644
index 00000000000000..0967ef27087674
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp
@@ -0,0 +1,151 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/mha.hpp"
+
+#include "utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+std::vector<std::vector<ov::test::InputShape>> originalShape_4D {
+    { {{}, {{1, 12, 197, 64}}}, {{}, {{1, 12, 64, 197}}}, {{}, {{1, 12, 197, 64}}} },
+    { {{}, {{1, 12, 12, 64}}},  {{}, {{1, 12, 64, 48}}},  {{}, {{1, 12, 48, 64}}} },
+    {
+        {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {1, 12, 100, 197}, {1, 3, 64, 128}, {1, 12, 600, 197}}},
+        {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}},
+    },
+    {
+        {PartialShape{1, 4, -1, -1}, {{1, 4, 384, 64}, {1, 4, 197, 64}, {1, 4, 384, 560}}},
+        {PartialShape{1, 4, -1, -1}, {{1, 4, 64, 128}, {1, 4, 64, 197}, {1, 4, 560, 384}}},
+        {PartialShape{1, 4, -1, 64}, {{1, 4, 128, 64}, {1, 4, 197, 64}, {1, 4, 384, 64}}},
+    }
+};
+
+std::vector<std::vector<ov::test::InputShape>> originalShape_3D {
+    { {{}, {{12, 197, 64}}},  {{}, {{12, 64, 197}}},  {{}, {{12, 197, 64}}} },
+    { {{}, {{12, 128, 100}}}, {{}, {{12, 100, 128}}}, {{}, {{12, 128, 100}}} },
+    {
+        {PartialShape{-1, -1, 64},  {{2, 9, 64},   {1, 64, 64},  {2, 64, 64}}},
+        {PartialShape{-1, 64, 124}, {{2, 64, 124}, {1, 64, 124}, {2, 64, 124}}},
+        {PartialShape{-1, 124, 64}, {{2, 124, 64}, {1, 124, 64}, {2, 124, 64}}},
+    },
+    {
+        {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}},
+        {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}},
+        {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}},
+    },
+    {
+        {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}},
+        {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 4}, {2, 64, 9}}},
+        {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}},
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWOTransposeOnInputs_4D,
+    MHAWOTransposeOnInputs,
+    ::testing::Combine(::testing::ValuesIn(originalShape_4D),
+                       ::testing::Values(std::vector<ov::element::Type>{}),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(true),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(1),
+                       ::testing::Values(1),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWOTranspose_4D,
+    MHAWOTranspose,
+    ::testing::Combine(::testing::ValuesIn(originalShape_4D),
+                       ::testing::ValuesIn(precision_f32(3)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(true),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(1),
+                       ::testing::Values(1),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWOTranspose_3D,
+    MHAWOTranspose,
+    ::testing::Combine(::testing::ValuesIn(originalShape_3D),
+                       ::testing::ValuesIn(precision_f32(3)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(true),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(1),
+                       ::testing::Values(1),
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWOTransposeBF16_4D,
+    MHAWOTranspose,
+    ::testing::Combine(::testing::ValuesIn(originalShape_4D),
+                       ::testing::ValuesIn(precision_bf16_if_supported(3)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(true),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWOTransposeBF16_3D,
+    MHAWOTranspose,
+    ::testing::Combine(::testing::ValuesIn(originalShape_3D),
+                       ::testing::ValuesIn(precision_bf16_if_supported(3)),
+                       ::testing::Values(ov::element::f32),
+                       ::testing::Values(true),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::empty_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWOTransposeEnforceBF16_4D,
+    MHAWOTranspose,
+    ::testing::Combine(::testing::ValuesIn(originalShape_4D),
+                       ::testing::ValuesIn(precision_f32(3)),
+                       ::testing::Values(ov::element::bf16),
+                       ::testing::Values(true),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
+    MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_MHAWOTransposeEnforceBF16_3D,
+    MHAWOTranspose,
+    ::testing::Combine(::testing::ValuesIn(originalShape_3D),
+                       ::testing::ValuesIn(precision_f32(3)),
+                       ::testing::Values(ov::element::bf16),
+                       ::testing::Values(true),  // Need to support False for graph builder in tests
+                       ::testing::Values(MHA::default_thread_count),
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(5),  // MHA + 4 extra Converts on inputs and output
+                       ::testing::Values(ov::test::utils::DEVICE_CPU),
+                       ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
+    MHA::getTestCaseName);
+
+}  // namespace
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp
index c05087283305e4..ea7de9ccb209ad 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp
@@ -6,36 +6,28 @@
 
 #include "common_test_utils/test_constants.hpp"
 #include "openvino/runtime/system_conf.hpp"
+#include "utils.hpp"
 
 namespace ov {
 namespace test {
 namespace snippets {
 
-#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})
-
 namespace {
 static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32 = true) {
-    std::vector<std::vector<element::Type>> prc = {
-            {element::f32, element::f32},
-    };
-// Note: low precisions are not supported by TPP yet (ticker: 130010)
+    std::vector<std::vector<element::Type>> prc = precision_f32(2);
+// Note: TPP doesn't support low precisions yet
 #ifndef SNIPPETS_LIBXSMM_TPP
     if (!only_fp32) {
-        // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
-        if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) {
-            prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
-            prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
-        }
-        // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
-        if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
-            prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
-        }
+        auto quant = quantized_precisions_if_supported();
+        std::copy(quant.begin(), quant.end(), std::back_inserter(prc));
+        auto bfloat = precision_bf16_if_supported(2);
+        std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc));
     }
 #endif
     return prc;
 }
 namespace transpose_zero_input {
-const auto& transpose_input_shapes = STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}});
+const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}});
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul,
                          ::testing::Combine(
                                  ::testing::ValuesIn(transpose_input_shapes),
@@ -84,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul,
 } // namespace transpose_zero_input
 
 namespace transpose_first_input {
-const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}});
+const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}});
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul,
                          ::testing::Combine(
                                  ::testing::ValuesIn(transpose_input_shapes),
@@ -126,7 +118,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ,
 } // namespace transpose_first_input
 
 namespace transpose_output {
-const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}});
+const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}});
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul,
                          ::testing::Combine(
@@ -195,7 +187,7 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32
 }
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ExplicitTransposeMatMul, ExplicitTransposeMatMul,
                          ::testing::Combine(
-                                 ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})),
+                                 ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})),
                                  ::testing::Values(1), // Transpose on second input
                                  ::testing::ValuesIn(precisions()),
                                  ::testing::Values(MatMulType::MatMul),
@@ -223,7 +215,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynExplicitTransposeMatMul, ExplicitTran
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulBias, ExplicitTransposeMatMulBias,
                          ::testing::Combine(
-                                 ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})),
+                                 ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})),
                                  ::testing::Values(1), // Transpose on second input
                                  ::testing::ValuesIn(precisions()),
                                  ::testing::Values(MatMulType::MatMul),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp
new file mode 100644
index 00000000000000..6c0d54da973086
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "internal_properties.hpp"
+#include "utils/cpu_test_utils.hpp"
+#include "openvino/runtime/system_conf.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+#define SNIPPETS_TESTS_STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})
+
+static inline bool is_bf16_supported_by_brgemm() {
+    return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16();
+}
+
+static inline bool is_i8_supported_by_brgemm() {
+    return ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8();
+}
+
+static inline std::vector<std::vector<element::Type>> precision_f32(size_t count) {
+    std::vector<std::vector<element::Type>> prc;
+    prc.emplace_back(std::vector<element::Type>(count, element::f32));
+    return prc;
+}
+
+static inline std::vector<std::vector<element::Type>> precision_bf16_if_supported(size_t count) {
+    std::vector<std::vector<element::Type>> prc;
+    if (is_bf16_supported_by_brgemm())
+        prc.emplace_back(std::vector<element::Type>(count, element::bf16));
+    return prc;
+}
+
+static inline std::vector<std::vector<element::Type>> quantized_precisions_if_supported() {
+    std::vector<std::vector<element::Type>> prc = {};
+    // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
+    if (is_i8_supported_by_brgemm()) {
+        prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
+        prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
+    }
+    return prc;
+}
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/functional/plugin/shared/include/snippets/mha.hpp b/src/tests/functional/plugin/shared/include/snippets/mha.hpp
index f8198dee0218ee..34cb4d452bfb15 100644
--- a/src/tests/functional/plugin/shared/include/snippets/mha.hpp
+++ b/src/tests/functional/plugin/shared/include/snippets/mha.hpp
@@ -44,6 +44,7 @@ class MHABase :  virtual public SnippetsTestsCommon {
     void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override;
     virtual std::shared_ptr<SnippetsFunctionBase> get_subgraph() const = 0;
     virtual void init_params(std::vector<InputShape>& input_shapes, ov::element::Type& prc, ov::AnyMap& additional_config) = 0;
+    virtual void init_thresholds();
 
     size_t m_thread_count;
     std::vector<ov::element::Type> m_input_types;
@@ -88,6 +89,7 @@ class MHATransposedB : public MHA {
 class MHAINT8MatMul : public MHA {
 protected:
     std::shared_ptr<SnippetsFunctionBase> get_subgraph() const override;
+    void init_thresholds() override;
 };
 
 class MHAQuantMatMul0 : public MHA {
@@ -103,6 +105,7 @@ class MHAFQAfterMatMul : public MHA {
 class MHAFQ : public MHA {
 protected:
     std::shared_ptr<SnippetsFunctionBase> get_subgraph() const override;
+    void init_thresholds() override;
 };
 
 class MHAWithExtractedReshape : public MHA {
diff --git a/src/tests/functional/plugin/shared/src/snippets/mha.cpp b/src/tests/functional/plugin/shared/src/snippets/mha.cpp
index 351cd50856357d..8d0cb8613bc47e 100644
--- a/src/tests/functional/plugin/shared/src/snippets/mha.cpp
+++ b/src/tests/functional/plugin/shared/src/snippets/mha.cpp
@@ -53,15 +53,19 @@ void MHABase::SetUp() {
         configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"});
     }
 
-    setInferenceType(prc);
     inType = outType = prc;
+    setInferenceType(prc);
+    init_thresholds();
+}
+
+ void MHABase::init_thresholds() {
     // Note: Libxsmm calculates Exp in a slightly different way, so the abs values might differ a bit. Ticket: 130699
 #ifdef SNIPPETS_LIBXSMM_TPP
     abs_threshold = 1e-6;
 #endif
-    if (prc == ov::element::bf16)
+    if (inType == ov::element::bf16)
         rel_threshold = 0.05f;
-}
+ }
 
 std::string MHA::getTestCaseName(testing::TestParamInfo<ov::test::snippets::MHAParams> obj) {
     std::vector<InputShape> input_shapes;
@@ -194,6 +198,11 @@ std::shared_ptr<SnippetsFunctionBase> MHAINT8MatMul::get_subgraph() const {
     return std::make_shared<ov::test::snippets::MHAINT8MatMulFunction>(inputDynamicShapes);
 }
 
+void MHAINT8MatMul::init_thresholds() {
+    MHABase::init_thresholds();
+    abs_threshold = 4e-6;
+}
+
 std::shared_ptr<SnippetsFunctionBase> MHAQuantMatMul0::get_subgraph() const {
     return std::make_shared<ov::test::snippets::MHAQuantMatMul0Function>(inputDynamicShapes);
 }
@@ -206,6 +215,11 @@ std::shared_ptr<SnippetsFunctionBase> MHAFQ::get_subgraph() const {
     return std::make_shared<ov::test::snippets::MHAFQFunction>(inputDynamicShapes);
 }
 
+void MHAFQ::init_thresholds() {
+    MHABase::init_thresholds();
+    abs_threshold = 0.016;
+}
+
 std::shared_ptr<SnippetsFunctionBase> MHAMulAdd::get_subgraph() const {
     return std::make_shared<ov::test::snippets::MHAMulAddFunction>(inputDynamicShapes);
 }
diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp
index 90ab47214effee..f54f92c598a45f 100644
--- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp
+++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp
@@ -235,9 +235,7 @@ class MHAWOTransposeSplitMFunction : public MHAWOTransposeFunction {
  *            FakeQuantize i8
  *                 \   /
  *                  Add
- *                Reshape0
- *                Softmax
- *                Reshape1  Transpose2[0,2,1,3]
+ *                Softmax   Transpose2[0,2,1,3]
  *                    \      /
  *                     MatMul1
  *                   FakeQuantize i8
@@ -261,9 +259,7 @@ class MHAFQAfterMatMulFunction : public SnippetsFunctionBase {
  *            FakeQuantize i8
  *                 \   /
  *                  Add
- *                Reshape0
- *                Softmax
- *                Reshape1   FakeQuantize i8
+ *                Softmax    FakeQuantize i8
  *            FakeQuantize u8 Transpose2[0,2,1,3]
  *                    \      /
  *                     MatMul1
@@ -281,20 +277,17 @@ class MHAINT8MatMulFunction : public SnippetsFunctionBase {
 };
 
 /* Graph:
- *   FakeQuantize i8      Reshape1
- *       Reshape0       Transpose1[0,2,3,1]
+ *   FakeQuantize i8      Transpose1[0,2,3,1]
  * Transpose0[0,2,1,3] FakeQuantize i8
  *              \     /
  *              MatMul0
  *                 \   /
- *                  Add        Reshape2
+ *                  Add
  *                Softmax   Transpose2[0,2,1,3]
  *                    \      /
  *                     MatMul1
  *                  FakeQuantize i8
  *                  Transpose3[0,2,1,3]
- *                    Reshape3
- * Note: Reshapes are tosplit Tokenization between FQs and deq Mul and MHA since Snippets::Ignore_Callback may be enabled
  */
 class MHAQuantMatMul0Function : public SnippetsFunctionBase {
 public:
diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp
index 1dbf8d7d22ed26..34f42ec838aa6d 100644
--- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp
+++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp
@@ -598,38 +598,25 @@ std::shared_ptr<ov::Model> MHAFQAfterMatMulFunction::initOriginal() const {
     auto transpose2Param = std::make_shared<ov::opset1::Parameter>(precision, input_shapes[3]);
     ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param};
 
-    const auto shape_rank = input_shapes[0].get_shape().size();
+    const auto shape_rank = input_shapes[0].size();
     auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
     auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 3, 1});
     auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
     auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
 
-    std::vector<int64_t> reshape0ConstData = {static_cast<int64_t>(input_shapes[0].get_shape()[0] *
-                                                                   input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]),
-                                              -1};
-    auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData);
-
-    std::vector<int64_t> reshape1ConstData = {static_cast<int64_t>(input_shapes[0].get_shape()[0]),
-                                              static_cast<int64_t>(input_shapes[0].get_shape()[2]),
-                                              static_cast<int64_t>(input_shapes[0].get_shape()[1]),
-                                              static_cast<int64_t>(input_shapes[0].get_shape()[1])};
-    auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData);
-
     bool transA = false;
     bool transB = false;
     const auto transpose0 = std::make_shared<ov::op::v1::Transpose>(transpose0Param, transpose0Const);
     const auto transpose1 = std::make_shared<ov::op::v1::Transpose>(transpose1Param, transpose1Const);
     const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(transpose0, transpose1, transA, transB);
     auto fq0 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1},
-                                                 {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
+                                                   {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
     const auto add = std::make_shared<ov::op::v1::Add>(fq0, addParam);
-    const auto reshape0 = std::make_shared<ov::opset1::Reshape>(add, reshape0Const, true);
-    const auto softMax = std::make_shared<ov::opset1::Softmax>(reshape0, 1);
-    const auto reshape1 = std::make_shared<ov::opset1::Reshape>(softMax, reshape1Const, true);
+    const auto softMax = std::make_shared<ov::op::v8::Softmax>(add, -1);
     const auto transpose2 = std::make_shared<ov::op::v1::Transpose>(transpose2Param, transpose2Const);
-    const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(reshape1, transpose2, transA, transB);
+    const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(softMax, transpose2, transA, transB);
     auto fq1 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1},
-                                                 {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
+                                                   {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
     const auto transpose3 = std::make_shared<ov::op::v1::Transpose>(fq1, transpose3Const);
 
     ov::ResultVector results{std::make_shared<ov::opset1::Result>(transpose3)};
@@ -642,46 +629,33 @@ std::shared_ptr<ov::Model> MHAINT8MatMulFunction::initOriginal() const {
     auto transpose2Param = std::make_shared<ov::opset1::Parameter>(precision, input_shapes[3]);
     ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param};
 
-    const auto shape_rank = input_shapes[0].get_shape().size();
+    const auto shape_rank = input_shapes[0].size();
     auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
     auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 3, 1});
     auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
     auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
 
-    std::vector<int64_t> reshape0ConstData = {static_cast<int64_t>(input_shapes[0].get_shape()[0] *
-                                                                   input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]),
-                                              -1};
-    auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData);
-
-    std::vector<int64_t> reshape1ConstData = {static_cast<int64_t>(input_shapes[0].get_shape()[0]),
-                                              static_cast<int64_t>(input_shapes[0].get_shape()[2]),
-                                              static_cast<int64_t>(input_shapes[0].get_shape()[1]),
-                                              static_cast<int64_t>(input_shapes[0].get_shape()[1])};
-    auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData);
-
     auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1},
-                                                 {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
+                                                   {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
     auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1},
-                                                 {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
+                                                   {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
     auto fq2 = ov::test::utils::make_fake_quantize(transpose2Param, ov::element::f32, 256, {1},
-                                                 {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
+                                                   {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
     bool transA = false;
     bool transB = false;
     const auto transpose0 = std::make_shared<ov::op::v1::Transpose>(fq0, transpose0Const);
     const auto transpose1 = std::make_shared<ov::op::v1::Transpose>(fq1, transpose1Const);
     const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(transpose0, transpose1, transA, transB);
     auto fq3 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1},
-                                                 {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
+                                                   {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
     const auto add = std::make_shared<ov::op::v1::Add>(fq3, addParam);
-    const auto reshape0 = std::make_shared<ov::opset1::Reshape>(add, reshape0Const, true);
-    const auto softMax = std::make_shared<ov::opset1::Softmax>(reshape0, 1);
-    const auto reshape1 = std::make_shared<ov::opset1::Reshape>(softMax, reshape1Const, true);
-    auto fq4 = ov::test::utils::make_fake_quantize(reshape1, ov::element::f32, 256, {1},
-                                                 {0}, {0.820726}, {0}, {0.820726});
+    const auto softMax = std::make_shared<ov::op::v8::Softmax>(add, -1);
+    auto fq4 = ov::test::utils::make_fake_quantize(softMax, ov::element::f32, 256, {1},
+                                                   {0}, {0.820726}, {0}, {0.820726});
     const auto transpose2 = std::make_shared<ov::op::v1::Transpose>(fq2, transpose2Const);
     const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(fq4, transpose2, transA, transB);
     auto fq5 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1},
-                                                 {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
+                                                   {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294});
     const auto transpose3 = std::make_shared<ov::op::v1::Transpose>(fq5, transpose3Const);
 
     ov::ResultVector results{std::make_shared<ov::opset1::Result>(transpose3)};
@@ -694,34 +668,20 @@ std::shared_ptr<ov::Model> MHAQuantMatMul0Function::initOriginal() const {
     auto transpose2Param = std::make_shared<ov::opset1::Parameter>(precision, input_shapes[3]);
     ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param};
 
-    const auto channel = int64_t(12);
-    const auto last_dim = input_shapes[0].get_shape().back();
-    OPENVINO_ASSERT(last_dim % channel == 0, "Incorrect test configuration");
-    const auto new_shape = std::vector<int64_t>{0, 0, channel, static_cast<int64_t>(last_dim) / channel};
-
-    auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape);
-    auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape);
-    auto reshape2Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape);
-    auto reshape3Const = ov::op::v0::Constant::create(ov::element::i64, {input_shapes[0].size()}, std::vector<int64_t>{0, 0, -1});
-
-    auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector<int64_t>{0, 2, 1, 3});
-    auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector<int64_t>{0, 2, 3, 1});
-    auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector<int64_t>{0, 2, 1, 3});
-    auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector<int64_t>{0, 2, 1, 3});
-
-    const auto reshape1 = std::make_shared<ov::op::v1::Reshape>(transpose1Param, reshape1Const, true);
-    const auto reshape2 = std::make_shared<ov::op::v1::Reshape>(transpose2Param, reshape2Const, true);
+    const auto shape_rank = input_shapes[0].size();
+    auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
+    auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 3, 1});
+    auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
+    auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
 
-    const auto transpose1 = std::make_shared<ov::op::v1::Transpose>(reshape1, transpose1Const);
-    const auto transpose2 = std::make_shared<ov::op::v1::Transpose>(reshape2, transpose2Const);
+    const auto transpose1 = std::make_shared<ov::op::v1::Transpose>(transpose1Param, transpose1Const);
+    const auto transpose2 = std::make_shared<ov::op::v1::Transpose>(transpose2Param, transpose2Const);
 
     auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1},
-                                                 {-12.5187311}, {12.4209289}, {-12.5187311}, {12.4209289});
+                                                   {-12.5187311}, {12.4209289}, {-12.5187311}, {12.4209289});
     auto fq1 = ov::test::utils::make_fake_quantize(transpose1, ov::element::f32, 256, {1},
-                                                 {-1.43326699}, {1.42206954}, {-1.43326699}, {1.42206954});
-
-    const auto reshape0 = std::make_shared<ov::op::v1::Reshape>(fq0, reshape0Const, true);
-    const auto transpose0 = std::make_shared<ov::op::v1::Transpose>(reshape0, transpose0Const);
+                                                   {-1.43326699}, {1.42206954}, {-1.43326699}, {1.42206954});
+    const auto transpose0 = std::make_shared<ov::op::v1::Transpose>(fq0, transpose0Const);
 
     const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(transpose0, fq1);
     const auto add = std::make_shared<ov::op::v1::Add>(matMul0, addParam);
@@ -729,11 +689,10 @@ std::shared_ptr<ov::Model> MHAQuantMatMul0Function::initOriginal() const {
 
     const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(softMax, transpose2);
     auto fq2 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1},
-                                                 {-1.81826221}, {1.804057}, {-1.81826221}, {1.804057});
+                                                   {-1.81826221}, {1.804057}, {-1.81826221}, {1.804057});
     const auto transpose3 = std::make_shared<ov::op::v1::Transpose>(fq2, transpose3Const);
-    const auto reshape3 = std::make_shared<ov::op::v1::Reshape>(transpose3, reshape3Const, true);
 
-    ov::ResultVector results{std::make_shared<ov::opset1::Result>(reshape3)};
+    ov::ResultVector results{std::make_shared<ov::opset1::Result>(transpose3)};
     return std::make_shared<ov::Model>(results, ngraphParam, "mha");
 }
 std::shared_ptr<ov::Model> MHAFQFunction::initOriginal() const {
@@ -743,18 +702,15 @@ std::shared_ptr<ov::Model> MHAFQFunction::initOriginal() const {
     auto transpose2Param = std::make_shared<ov::opset1::Parameter>(precision, input_shapes[3]);
     ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param};
 
-    const auto shape_rank = input_shapes[0].get_shape().size();
+    const auto shape_rank = input_shapes[0].size();
     auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
     auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 3, 1});
     auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
     auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector<int64_t>{0, 2, 1, 3});
 
-    const auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1},
-                                                       {-5.217694}, {6.661877}, {-5.217694}, {6.661877});
-    const auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1},
-                                                       {-6.40245}, {6.45286}, {-6.40245}, {6.45286});
-    const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1},
-                                                          {-1000}, {0}, {-1000}, {0});
+    const auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, {-5.217694}, {6.661877}, {-5.217694}, {6.661877});
+    const auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1}, {-6.40245}, {6.45286}, {-6.40245}, {6.45286});
+    const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1}, {-1000}, {0}, {-1000}, {0});
 
     bool transA = false;
     bool transB = false;
@@ -766,16 +722,13 @@ std::shared_ptr<ov::Model> MHAFQFunction::initOriginal() const {
     const auto mul_deq_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1}, std::vector<float>{0.00098425});
     const auto mul_deq = std::make_shared<ov::opset1::Multiply>(convert, mul_deq_const);
     const auto mul = std::make_shared<ov::opset1::Multiply>(transpose1, mul_deq);
-    auto fq1_1 = ov::test::utils::make_fake_quantize(mul, ov::element::f32, 256, {1},
-                                                   {-0.8003067}, {0.8066083}, {-0.8003067}, {0.8066083});
+    const auto fq1_1 = ov::test::utils::make_fake_quantize(mul, ov::element::f32, 256, {1}, {-0.8003067}, {0.8066083}, {-0.8003067}, {0.8066083});
     const auto matMul0 = std::make_shared<ov::op::v0::MatMul>(transpose0, fq1_1, transA, transB);
-    auto fq2 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1},
-                                                   {-14.50351}, {17.65645}, {-14.50351}, {17.65645});
+    const auto fq2 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, {-14.50351}, {17.65645}, {-14.50351}, {17.65645});
     const auto add = std::make_shared<ov::opset1::Add>(fq2, fq_add);
     const auto softMax = std::make_shared<ov::opset1::Softmax>(add, 3);
     const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(softMax, transpose2, transA, transB);
-    auto fq3 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1},
-                                                 {-1.895786}, {2.0028071}, {-1.895786}, {2.0028071});
+    auto fq3 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, {-1.895786}, {2.0028071}, {-1.895786}, {2.0028071});
     const auto transpose3 = std::make_shared<ov::op::v1::Transpose>(fq3, transpose3Const);
 
     ov::ResultVector results{std::make_shared<ov::opset1::Result>(transpose3)};