diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 0ef89ec8b8..a33e0339f3 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -154,16 +154,16 @@ class OVWeightCompressionTest(unittest.TestCase): # TODO : add models SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ( (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70, 70), - (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44), ) - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 64, 365),) - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 388),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 365),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 385),) SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS = ( - (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 16, 136), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 14, 136), ) SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ( - (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44), ) LOAD_IN_4_BITS_SCOPE = ( @@ -171,7 +171,7 @@ class OVWeightCompressionTest(unittest.TestCase): OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", dict(bits=4, sym=False, group_size=-1, ratio=0.8), - 16, + 14, ), ( OVModelForCausalLM, @@ -182,13 +182,13 @@ class OVWeightCompressionTest(unittest.TestCase): group_size=32, ignored_scope={"names": ["__module.model.transformer.h.2.mlp.c_fc/aten::addmm/MatMul"]}, ), - 6, + 4, ), ( OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", dict(bits=4, sym=False, group_size=-1, ratio=0.8, all_layers=True), - 22, + 18, ), ( OVModelForCausalLM, @@ -201,7 +201,7 @@ class OVWeightCompressionTest(unittest.TestCase): sensitivity_metric="mean_activation_magnitude", dataset="ptb", ), - 16, + 14, ), ( OVModelForCausalLM, @@ -215,7 +215,7 @@ class OVWeightCompressionTest(unittest.TestCase): dataset="ptb", awq=True, ), - 16, + 14, ), ) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 937c0bf3f5..4f55b23e71 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -365,7 +365,7 @@ def tearDown(self): "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -376,7 +376,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -385,7 +385,7 @@ def tearDown(self): model_id="hf-internal-testing/tiny-random-bert", teacher_model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -397,7 +397,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -574,7 +574,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], expected_fake_quantize=28, expected_int8=28, - expected_binary_masks=48, + expected_binary_masks=40, compression_metrics=["compression_loss"], ), "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( @@ -591,7 +591,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], expected_fake_quantize=28, expected_int8=28, - expected_binary_masks=48, + expected_binary_masks=40, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 8fabb34e38..04049172d3 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -102,12 +102,12 @@ SEED = 42 _ARCHITECTURES_TO_EXPECTED_INT8 = { - "bert": (70,), + "bert": (68,), "roberta": (68,), "albert": (84,), "vit": (64,), "blenderbot": (70,), - "gpt2": (46,), + "gpt2": (44,), "wav2vec2": (34,), "distilbert": (66,), "t5": (64, 104, 84), @@ -116,7 +116,7 @@ "stable-diffusion-xl-refiner": (366, 34, 42, 66), } -_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (64, 477)} +_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (62, 477)} def get_num_quantized_nodes(ov_model):