From 94a990f2d08e1b68a2f78ccd1fe437236309caf2 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 8 Mar 2024 11:36:42 +0100
Subject: [PATCH] fix test expected int8

---
 tests/openvino/test_quantization.py | 20 ++++++++++----------
 tests/openvino/test_training.py     | 12 ++++++------
 tests/openvino/utils_tests.py       |  6 +++---
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 0ef89ec8b8..a33e0339f3 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -154,16 +154,16 @@ class OVWeightCompressionTest(unittest.TestCase):
     # TODO : add models
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
         (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70, 70),
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44),
     )
 
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 64, 365),)
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 388),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 365),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 385),)
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS = (
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 16, 136),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 14, 136),
     )
     SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44),
     )
 
     LOAD_IN_4_BITS_SCOPE = (
@@ -171,7 +171,7 @@ class OVWeightCompressionTest(unittest.TestCase):
             OVModelForCausalLM,
             "hf-internal-testing/tiny-random-gpt2",
             dict(bits=4, sym=False, group_size=-1, ratio=0.8),
-            16,
+            14,
         ),
         (
             OVModelForCausalLM,
@@ -182,13 +182,13 @@ class OVWeightCompressionTest(unittest.TestCase):
                 group_size=32,
                 ignored_scope={"names": ["__module.model.transformer.h.2.mlp.c_fc/aten::addmm/MatMul"]},
             ),
-            6,
+            4,
         ),
         (
             OVModelForCausalLM,
             "hf-internal-testing/tiny-random-gpt2",
             dict(bits=4, sym=False, group_size=-1, ratio=0.8, all_layers=True),
-            22,
+            18,
         ),
         (
             OVModelForCausalLM,
@@ -201,7 +201,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 sensitivity_metric="mean_activation_magnitude",
                 dataset="ptb",
             ),
-            16,
+            14,
         ),
         (
             OVModelForCausalLM,
@@ -215,7 +215,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 dataset="ptb",
                 awq=True,
             ),
-            16,
+            14,
         ),
     )
 
diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index 937c0bf3f5..4f55b23e71 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -365,7 +365,7 @@ def tearDown(self):
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -376,7 +376,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -385,7 +385,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -397,7 +397,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -574,7 +574,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
         expected_fake_quantize=28,
         expected_int8=28,
-        expected_binary_masks=48,
+        expected_binary_masks=40,
         compression_metrics=["compression_loss"],
     ),
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
@@ -591,7 +591,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
         expected_fake_quantize=28,
         expected_int8=28,
-        expected_binary_masks=48,
+        expected_binary_masks=40,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 8fabb34e38..04049172d3 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -102,12 +102,12 @@
 SEED = 42
 
 _ARCHITECTURES_TO_EXPECTED_INT8 = {
-    "bert": (70,),
+    "bert": (68,),
     "roberta": (68,),
     "albert": (84,),
     "vit": (64,),
     "blenderbot": (70,),
-    "gpt2": (46,),
+    "gpt2": (44,),
     "wav2vec2": (34,),
     "distilbert": (66,),
     "t5": (64, 104, 84),
@@ -116,7 +116,7 @@
     "stable-diffusion-xl-refiner": (366, 34, 42, 66),
 }
 
-_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (64, 477)}
+_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (62, 477)}
 
 
 def get_num_quantized_nodes(ov_model):