Fix expected quantization matmul test (#531)

huggingface · Jan 25, 2024 · e0c1143 · e0c1143
1 parent d96ebfa
commit e0c1143
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 18 deletions.
diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
@@ -19,7 +19,6 @@ jobs:
       matrix:
         python-version: [3.8, 3.11]
         os: [ubuntu-latest]
-        openvino: ["openvino", "openvino-nightly"]
 
     runs-on: ${{ matrix.os }}
     steps:
@@ -34,9 +33,6 @@ jobs:
         # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
         pip install .[openvino,nncf,tests,diffusers]
-    - name: Install openvino-nightly (optional)
-      run: pip uninstall -y openvino && pip install ${{ matrix.openvino }}
-      if: matrix.openvino == 'openvino-nightly'
     - name: Test with Pytest
       run: |
         pytest tests/openvino/ --ignore test_modeling_basic
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -148,10 +148,13 @@ class OVWeightCompressionTest(unittest.TestCase):
     # TODO : add models
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
         (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70, 70),
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 45, 44),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46),
     )
 
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 82, 295),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 64, 365),)
+    SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = (
+        (OVModelForCausalLM, "opt125m", 64, 477),
+    )
 
     SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = (
         (OVModelForCausalLM, "gpt2"),
@@ -242,7 +245,7 @@ def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_i
             outputs = model(**tokens)
             self.assertTrue("logits" in outputs)
 
-    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS)
+    @parameterized.expand(SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS)
     @unittest.skipIf(not IS_SUPPORT_STATEFUL, "Stateful models supported only in 2023.3 and above")
     def test_ovmodel_4bit_weight_compression_stateful(self, model_cls, model_name, expected_int8, expected_int4):
         task = model_cls.export_feature

diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
@@ -321,15 +321,15 @@ def tearDown(self):
     "default_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=42,
+        expected_fake_quantize=44,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
     "distillation,default_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=42,
+        expected_fake_quantize=44,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
@@ -364,7 +364,7 @@ def tearDown(self):
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=42,
+        expected_fake_quantize=44,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -381,7 +381,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=42,
+        expected_fake_quantize=44,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -411,7 +411,7 @@ def tearDown(self):
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=42,
+        expected_fake_quantize=44,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -428,7 +428,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=42,
+        expected_fake_quantize=44,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -99,14 +99,13 @@
 
 SEED = 42
 
-
 _ARCHITECTURES_TO_EXPECTED_INT8 = {
-    "bert": (68,),
+    "bert": (70,),
     "roberta": (68,),
     "albert": (84,),
     "vit": (62,),
     "blenderbot": (70,),
-    "gpt2": (44,),
+    "gpt2": (46,),
     "wav2vec2": (30,),
     "distilbert": (66,),
     "t5": (64, 104, 84),
@@ -115,8 +114,7 @@
     "stable-diffusion-xl-refiner": (296, 8, 8, 66),
 }
 
-
-_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (82, 295)}
+_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (64, 477)}
 
 
 def get_num_quantized_nodes(ov_model):