From ef90dc0346a268958fa41f34986158baec2a61b2 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 6 Mar 2024 18:49:06 +0100
Subject: [PATCH 01/11] Remove accelerate dependency

---
 optimum/intel/__init__.py                     | 36 ++++++++++++-------
 optimum/intel/openvino/__init__.py            |  6 +++-
 optimum/intel/openvino/quantization.py        | 14 +++++---
 .../utils/dummy_openvino_and_nncf_objects.py  |  6 ++--
 setup.py                                      |  4 +--
 5 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
index 93a4417bfc..22ee7c5ac9 100644
--- a/optimum/intel/__init__.py
+++ b/optimum/intel/__init__.py
@@ -15,7 +15,7 @@
 import importlib.util
 from typing import TYPE_CHECKING
 
-from transformers.utils import OptionalDependencyNotAvailable, _LazyModule
+from transformers.utils import OptionalDependencyNotAvailable, _LazyModule, is_accelerate_available
 
 from .utils import (
     is_diffusers_available,
@@ -57,13 +57,19 @@
     if not (is_openvino_available() and is_nncf_available()):
         raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
-    _import_structure["utils.dummy_openvino_and_nncf_objects"] = [
-        "OVQuantizer",
-        "OVTrainer",
-        "OVTrainingArguments",
-    ]
+    _import_structure["utils.dummy_openvino_and_nncf_objects"] = ["OVQuantizer", "OVTrainingArguments"]
+else:
+    _import_structure["openvino"].extend(["OVQuantizer", "OVTrainingArguments"])
+
+
+try:
+    if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    _import_structure["utils.dummy_openvino_and_nncf_objects"] = ["OVTrainer"]
 else:
-    _import_structure["openvino"].extend(["OVQuantizer", "OVTrainer", "OVTrainingArguments"])
+    _import_structure["openvino"].extend(["OVTrainer"])
+
 
 try:
     if not (is_openvino_available() and is_diffusers_available()):
@@ -177,13 +183,17 @@
         if not (is_openvino_available() and is_nncf_available()):
             raise OptionalDependencyNotAvailable()
     except OptionalDependencyNotAvailable:
-        from .utils.dummy_openvino_and_nncf_objects import (
-            OVQuantizer,
-            OVTrainer,
-            OVTrainingArguments,
-        )
+        from .utils.dummy_openvino_and_nncf_objects import OVQuantizer, OVTrainingArguments
+    else:
+        from .openvino import OVQuantizer, OVTrainingArguments
+
+    try:
+        if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        from .utils.dummy_openvino_and_nncf_objects import OVTrainer
     else:
-        from .openvino import OVQuantizer, OVTrainer, OVTrainingArguments
+        from .openvino import OVTrainer
 
     try:
         if not (is_openvino_available() and is_diffusers_available()):
diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
index a6227615a2..fddc3a9a4f 100644
--- a/optimum/intel/openvino/__init__.py
+++ b/optimum/intel/openvino/__init__.py
@@ -14,6 +14,8 @@
 
 import logging
 
+from transformers.utils import is_accelerate_available
+
 from ..utils.import_utils import is_diffusers_available, is_nncf_available
 from .utils import (
     OV_DECODER_NAME,
@@ -37,9 +39,11 @@
     patch_torch_operators()
 
     from .quantization import OVQuantizer
-    from .trainer import OVTrainer
     from .training_args import OVTrainingArguments
 
+    if is_accelerate_available():
+        from .trainer import OVTrainer
+
 
 from .configuration import OVConfig, OVWeightQuantizationConfig
 from .modeling import (
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
index d7b88f2be3..cd26f91f22 100644
--- a/optimum/intel/openvino/quantization.py
+++ b/optimum/intel/openvino/quantization.py
@@ -17,7 +17,7 @@
 import logging
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Tuple, Union
 
 import nncf
 import openvino
@@ -56,8 +56,7 @@
 
 
 if is_datasets_available():
-    if TYPE_CHECKING:
-        from datasets import Dataset
+    from datasets import Dataset
 
 register_module(ignored_algorithms=[])(Conv1D)
 
@@ -147,6 +146,7 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No
             )
         self.task = task or feature
         self.seed = seed
+        # TODO : deprecate input_names
         self.input_names = None
         signature = inspect.signature(self.model.forward)
         self._signature_columns = list(signature.parameters.keys())
@@ -526,9 +526,15 @@ def _get_calibration_dataloader(
         data_collator: Optional[DataCollator] = None,
     ) -> OVDataLoader:
         data_collator = data_collator if data_collator is not None else default_data_collator
+
+        if not is_datasets_available() or not isinstance(calibration_dataset, Dataset):
+            logger.warning(
+                "`remove_unused_columns` set to `False` as calibration_dataset is not an instance of `datasets.Dataset`"
+            )
+            remove_unused_columns = False
+
         if remove_unused_columns:
             calibration_dataset = self._remove_unused_columns(calibration_dataset)
-        self.input_names = calibration_dataset.column_names
         generator = torch.Generator()
         generator.manual_seed(self.seed)
         sampler = RandomSampler(calibration_dataset, generator=generator)
diff --git a/optimum/intel/utils/dummy_openvino_and_nncf_objects.py b/optimum/intel/utils/dummy_openvino_and_nncf_objects.py
index 45c390aff2..8ae3135667 100644
--- a/optimum/intel/utils/dummy_openvino_and_nncf_objects.py
+++ b/optimum/intel/utils/dummy_openvino_and_nncf_objects.py
@@ -27,14 +27,14 @@ def from_pretrained(cls, *args, **kwargs):
 
 
 class OVTrainer(metaclass=DummyObject):
-    _backends = ["openvino", "nncf"]
+    _backends = ["openvino", "nncf", "accelerate"]
 
     def __init__(self, *args, **kwargs):
-        requires_backends(self, ["openvino", "nncf"])
+        requires_backends(self, ["openvino", "nncf", "accelerate"])
 
     @classmethod
     def from_pretrained(cls, *args, **kwargs):
-        requires_backends(cls, ["openvino", "nncf"])
+        requires_backends(cls, ["openvino", "nncf", "accelerate"])
 
 
 class OVQuantizer(metaclass=DummyObject):
diff --git a/setup.py b/setup.py
index dd98548018..6045e0d3d4 100644
--- a/setup.py
+++ b/setup.py
@@ -18,10 +18,10 @@
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",
-    "accelerate",  # transformers 4.29 require accelerate for PyTorch
 ]
 
 TESTS_REQUIRE = [
+    "accelerate",
     "pytest",
     "parameterized",
     "Pillow",
@@ -40,7 +40,7 @@
 
 EXTRAS_REQUIRE = {
     "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0"],
-    "openvino": ["openvino>=2023.3", "onnx", "onnxruntime", "nncf>=2.8.1"],
+    "openvino": ["openvino>=2023.3", "onnx", "nncf>=2.8.1"],
     "openvino-tokenizers": ["openvino-tokenizers[transformers]"],
     "nncf": ["nncf>=2.8.1"],
     "ipex": ["intel-extension-for-pytorch", "onnx"],

From 103620a80e29b6db570e09898b18d72b18208bf6 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 10:44:15 +0100
Subject: [PATCH 02/11] Add accelerate to import backend mapping

---
 optimum/intel/utils/import_utils.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/optimum/intel/utils/import_utils.py b/optimum/intel/utils/import_utils.py
index f1fb486c4f..1d5ce25086 100644
--- a/optimum/intel/utils/import_utils.py
+++ b/optimum/intel/utils/import_utils.py
@@ -156,6 +156,16 @@
         _datasets_available = False
 
 
+_accelerate_available = importlib.util.find_spec("accelerate") is not None
+_accelerate_version = "N/A"
+
+if _accelerate_available:
+    try:
+        _accelerate_version = importlib_metadata.version("accelerate")
+    except importlib_metadata.PackageNotFoundError:
+        _accelerate_available = False
+
+
 def is_transformers_available():
     return _transformers_available
 
@@ -196,6 +206,10 @@ def is_datasets_available():
     return _datasets_available
 
 
+def is_accelerate_available():
+    return _accelerate_available
+
+
 # This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L319
 def compare_versions(library_or_version: Union[str, Version], operation: str, requirement_version: str):
     """
@@ -317,6 +331,11 @@ def is_timm_version(operation: str, version: str):
 `pip install datasets`. Please note that you may need to restart your runtime after installation.
 """
 
+ACCELERATE_IMPORT_ERROR = """
+{0} requires the accelerate library but it was not found in your environment. You can install it with pip:
+`pip install accelerate`. Please note that you may need to restart your runtime after installation.
+"""
+
 BACKENDS_MAPPING = OrderedDict(
     [
         ("diffusers", (is_diffusers_available, DIFFUSERS_IMPORT_ERROR)),
@@ -324,6 +343,7 @@ def is_timm_version(operation: str, version: str):
         ("nncf", (is_nncf_available, NNCF_IMPORT_ERROR)),
         ("openvino", (is_openvino_available, OPENVINO_IMPORT_ERROR)),
         ("neural_compressor", (is_neural_compressor_available, NEURAL_COMPRESSOR_IMPORT_ERROR)),
+        ("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)),
     ]
 )
 

From ebc108bba2736ca47b4035f78c49fa82d99604a4 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 10:47:33 +0100
Subject: [PATCH 03/11] Add eval method to OVModels

---
 optimum/intel/openvino/modeling_base.py | 3 +++
 tests/openvino/test_modeling.py         | 1 +
 2 files changed, 4 insertions(+)

diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
index af00f7a06e..e4be73402d 100644
--- a/optimum/intel/openvino/modeling_base.py
+++ b/optimum/intel/openvino/modeling_base.py
@@ -448,6 +448,9 @@ def half(self):
         self.request = None
         return self
 
+    def eval(self):
+        return self
+
     def forward(self, *args, **kwargs):
         raise NotImplementedError
 
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 2188b7061f..d08297fd36 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -115,6 +115,7 @@ def test_load_from_hub_and_save_model(self):
         tokenizer = AutoTokenizer.from_pretrained(self.OV_MODEL_ID)
         tokens = tokenizer("This is a sample input", return_tensors="pt")
         loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID)
+        loaded_model.eval()
         self.assertIsInstance(loaded_model.config, PretrainedConfig)
         loaded_model_outputs = loaded_model(**tokens)
 

From 429e34cb2377ea5e2d750ed03537717b0e20d52a Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 10:48:47 +0100
Subject: [PATCH 04/11] add accelerate dependency for inc extra

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6045e0d3d4..323c87f8fd 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,7 @@
 QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
 
 EXTRAS_REQUIRE = {
-    "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0"],
+    "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0", "accelerate"],
     "openvino": ["openvino>=2023.3", "onnx", "nncf>=2.8.1"],
     "openvino-tokenizers": ["openvino-tokenizers[transformers]"],
     "nncf": ["nncf>=2.8.1"],

From 47fa545232e9ac2580174bd2628f804e08cec0a4 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 11:11:45 +0100
Subject: [PATCH 05/11] fix test

---
 tests/openvino/test_stable_diffusion.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/tests/openvino/test_stable_diffusion.py b/tests/openvino/test_stable_diffusion.py
index d8cef2e027..ab6f6f21a6 100644
--- a/tests/openvino/test_stable_diffusion.py
+++ b/tests/openvino/test_stable_diffusion.py
@@ -28,7 +28,6 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import floats_tensor
 from openvino.runtime.ie_api import CompiledModel
-from packaging.version import Version, parse
 from parameterized import parameterized
 from utils_tests import MODEL_NAMES, SEED
 
@@ -46,13 +45,8 @@
     OVModelVaeDecoder,
     OVModelVaeEncoder,
 )
-from optimum.onnxruntime import (
-    ORTStableDiffusionImg2ImgPipeline,
-    ORTStableDiffusionInpaintPipeline,
-    ORTStableDiffusionXLImg2ImgPipeline,
-    ORTStableDiffusionXLPipeline,
-)
-from optimum.utils.import_utils import _diffusers_version
+from optimum.intel.utils.import_utils import is_diffusers_version
+from optimum.utils.import_utils import is_onnxruntime_available
 
 
 F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
@@ -167,7 +161,6 @@ def generate_inputs(self, height=128, width=128, batch_size=1):
 class OVStableDiffusionImg2ImgPipelineTest(OVStableDiffusionPipelineBaseTest):
     SUPPORTED_ARCHITECTURES = ("stable-diffusion",)
     MODEL_CLASS = OVStableDiffusionImg2ImgPipeline
-    ORT_MODEL_CLASS = ORTStableDiffusionImg2ImgPipeline
     TASK = "image-to-image"
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
@@ -298,11 +291,13 @@ def test_height_width_properties(self, model_arch: str):
 class OVStableDiffusionInpaintPipelineTest(OVStableDiffusionPipelineBaseTest):
     SUPPORTED_ARCHITECTURES = ("stable-diffusion",)
     MODEL_CLASS = OVStableDiffusionInpaintPipeline
-    ORT_MODEL_CLASS = ORTStableDiffusionInpaintPipeline
     TASK = "inpaint"
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
+    @unittest.skipIf(not is_onnxruntime_available(), "this test requires onnxruntime")
     def test_compare_diffusers_pipeline(self, model_arch: str):
+        from optimum.onnxruntime import ORTStableDiffusionInpaintPipeline
+
         model_id = MODEL_NAMES[model_arch]
         pipeline = self.MODEL_CLASS.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
         batch_size, num_images, height, width = 1, 1, 64, 64
@@ -329,7 +324,7 @@ def test_compare_diffusers_pipeline(self, model_arch: str):
         outputs = pipeline(**inputs, latents=latents).images
         self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3))
 
-        ort_pipeline = self.ORT_MODEL_CLASS.from_pretrained(model_id, export=True)
+        ort_pipeline = ORTStableDiffusionInpaintPipeline.from_pretrained(model_id, export=True)
         ort_outputs = ort_pipeline(**inputs, latents=latents).images
         self.assertTrue(np.allclose(outputs, ort_outputs, atol=1e-1))
 
@@ -358,7 +353,6 @@ def generate_inputs(self, height=128, width=128, batch_size=1):
 class OVtableDiffusionXLPipelineTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ("stable-diffusion-xl",)
     MODEL_CLASS = OVStableDiffusionXLPipeline
-    ORT_MODEL_CLASS = ORTStableDiffusionXLPipeline
     PT_MODEL_CLASS = StableDiffusionXLPipeline
     TASK = "text-to-image"
 
@@ -444,7 +438,6 @@ def test_num_images_per_prompt_static_model(self, model_arch: str):
 class OVStableDiffusionXLImg2ImgPipelineTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ("stable-diffusion-xl", "stable-diffusion-xl-refiner")
     MODEL_CLASS = OVStableDiffusionXLImg2ImgPipeline
-    ORT_MODEL_CLASS = ORTStableDiffusionXLImg2ImgPipeline
     PT_MODEL_CLASS = StableDiffusionXLImg2ImgPipeline
     TASK = "image-to-image"
 
@@ -489,7 +482,7 @@ class OVLatentConsistencyModelPipelineTest(unittest.TestCase):
     TASK = "text-to-image"
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
-    @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version")
+    @unittest.skipIf(is_diffusers_version("<=", "0.21.4"), "not supported with this diffusers version")
     def test_compare_to_diffusers(self, model_arch: str):
         ov_pipeline = self.MODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True, ov_config=F32_CONFIG)
         self.assertIsInstance(ov_pipeline.text_encoder, OVModelTextEncoder)
@@ -532,7 +525,7 @@ def test_compare_to_diffusers(self, model_arch: str):
         self.assertEqual(pipeline.device.type, ov_pipeline.device)
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
-    @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version")
+    @unittest.skipIf(is_diffusers_version("<=", "0.21.4"), "not supported with this diffusers version")
     def test_num_images_per_prompt_static_model(self, model_arch: str):
         model_id = MODEL_NAMES[model_arch]
         pipeline = self.MODEL_CLASS.from_pretrained(model_id, export=True, compile=False, dynamic_shapes=False)

From d8372681759c858079a0f599a01a62a2bfda00ca Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 11:26:51 +0100
Subject: [PATCH 06/11] add onnxruntime install for OV test

---
 .github/workflows/test_openvino.yml | 2 +-
 optimum/intel/__init__.py           | 3 ++-
 optimum/intel/openvino/__init__.py  | 4 +---
 optimum/intel/utils/__init__.py     | 1 +
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
index bf9460c75a..6d709eecfd 100644
--- a/.github/workflows/test_openvino.yml
+++ b/.github/workflows/test_openvino.yml
@@ -32,7 +32,7 @@ jobs:
         python -m pip install --upgrade pip
         # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-        pip install .[openvino,openvino-tokenizers,nncf,tests,diffusers]
+        pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
     - name: Test with Pytest
       run: |
         pytest tests/openvino/ --ignore test_modeling_basic
diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
index 22ee7c5ac9..4f48ae7db8 100644
--- a/optimum/intel/__init__.py
+++ b/optimum/intel/__init__.py
@@ -15,9 +15,10 @@
 import importlib.util
 from typing import TYPE_CHECKING
 
-from transformers.utils import OptionalDependencyNotAvailable, _LazyModule, is_accelerate_available
+from transformers.utils import OptionalDependencyNotAvailable, _LazyModule
 
 from .utils import (
+    is_accelerate_available,
     is_diffusers_available,
     is_ipex_available,
     is_neural_compressor_available,
diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
index fddc3a9a4f..1df932771a 100644
--- a/optimum/intel/openvino/__init__.py
+++ b/optimum/intel/openvino/__init__.py
@@ -14,9 +14,7 @@
 
 import logging
 
-from transformers.utils import is_accelerate_available
-
-from ..utils.import_utils import is_diffusers_available, is_nncf_available
+from ..utils.import_utils import is_accelerate_available, is_diffusers_available, is_nncf_available
 from .utils import (
     OV_DECODER_NAME,
     OV_DECODER_WITH_PAST_NAME,
diff --git a/optimum/intel/utils/__init__.py b/optimum/intel/utils/__init__.py
index 4e7522ee77..d77588f896 100644
--- a/optimum/intel/utils/__init__.py
+++ b/optimum/intel/utils/__init__.py
@@ -16,6 +16,7 @@
     _neural_compressor_version,
     _torch_version,
     compare_versions,
+    is_accelerate_available,
     is_diffusers_available,
     is_ipex_available,
     is_neural_compressor_available,

From 8c7a0fec816a9d13d108775336fe9787da7650f2 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 15:04:58 +0100
Subject: [PATCH 07/11] unrelated

---
 optimum/intel/openvino/modeling_base.py | 3 ---
 tests/openvino/test_modeling.py         | 1 -
 2 files changed, 4 deletions(-)

diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
index e4be73402d..af00f7a06e 100644
--- a/optimum/intel/openvino/modeling_base.py
+++ b/optimum/intel/openvino/modeling_base.py
@@ -448,9 +448,6 @@ def half(self):
         self.request = None
         return self
 
-    def eval(self):
-        return self
-
     def forward(self, *args, **kwargs):
         raise NotImplementedError
 
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index d08297fd36..2188b7061f 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -115,7 +115,6 @@ def test_load_from_hub_and_save_model(self):
         tokenizer = AutoTokenizer.from_pretrained(self.OV_MODEL_ID)
         tokens = tokenizer("This is a sample input", return_tensors="pt")
         loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID)
-        loaded_model.eval()
         self.assertIsInstance(loaded_model.config, PretrainedConfig)
         loaded_model_outputs = loaded_model(**tokens)
 

From 0b50ed5cdfc87e6cfc68d1370946904f32b74d53 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 15:40:04 +0100
Subject: [PATCH 08/11] update setup

---
 setup.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 323c87f8fd..ac4056c30d 100644
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,7 @@
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",
+    "onnx",
 ]
 
 TESTS_REQUIRE = [
@@ -39,11 +40,11 @@
 QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
 
 EXTRAS_REQUIRE = {
-    "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0", "accelerate"],
-    "openvino": ["openvino>=2023.3", "onnx", "nncf>=2.8.1"],
+    "neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
+    "openvino": ["openvino>=2023.3", "nncf>=2.8.1"],
     "openvino-tokenizers": ["openvino-tokenizers[transformers]"],
     "nncf": ["nncf>=2.8.1"],
-    "ipex": ["intel-extension-for-pytorch", "onnx"],
+    "ipex": ["intel-extension-for-pytorch"],
     "diffusers": ["diffusers"],
     "quality": QUALITY_REQUIRE,
     "tests": TESTS_REQUIRE,

From 450a63b5e81a7394d331595d36833aab212a628d Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 7 Mar 2024 16:00:32 +0100
Subject: [PATCH 09/11] fix

---
 optimum/intel/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
index 4f48ae7db8..59059d688d 100644
--- a/optimum/intel/__init__.py
+++ b/optimum/intel/__init__.py
@@ -30,6 +30,7 @@
 
 _import_structure = {
     "openvino": [],
+    "utils.dummy_openvino_and_nncf_objects": [],
 }
 
 try:
@@ -58,7 +59,7 @@
     if not (is_openvino_available() and is_nncf_available()):
         raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
-    _import_structure["utils.dummy_openvino_and_nncf_objects"] = ["OVQuantizer", "OVTrainingArguments"]
+    _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVQuantizer", "OVTrainingArguments"])
 else:
     _import_structure["openvino"].extend(["OVQuantizer", "OVTrainingArguments"])
 
@@ -67,7 +68,7 @@
     if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
         raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
-    _import_structure["utils.dummy_openvino_and_nncf_objects"] = ["OVTrainer"]
+    _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVTrainer"])
 else:
     _import_structure["openvino"].extend(["OVTrainer"])
 
@@ -152,6 +153,7 @@
         "INCSeq2SeqTrainer",
         "INCTrainer",
     ]
+
 try:
     if not (is_neural_compressor_available() and is_diffusers_available()):
         raise OptionalDependencyNotAvailable()

From 94a990f2d08e1b68a2f78ccd1fe437236309caf2 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 8 Mar 2024 11:36:42 +0100
Subject: [PATCH 10/11] fix test expected int8

---
 tests/openvino/test_quantization.py | 20 ++++++++++----------
 tests/openvino/test_training.py     | 12 ++++++------
 tests/openvino/utils_tests.py       |  6 +++---
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 0ef89ec8b8..a33e0339f3 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -154,16 +154,16 @@ class OVWeightCompressionTest(unittest.TestCase):
     # TODO : add models
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
         (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70, 70),
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44),
     )
 
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 64, 365),)
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 388),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 365),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 385),)
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS = (
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 16, 136),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 14, 136),
     )
     SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44),
     )
 
     LOAD_IN_4_BITS_SCOPE = (
@@ -171,7 +171,7 @@ class OVWeightCompressionTest(unittest.TestCase):
             OVModelForCausalLM,
             "hf-internal-testing/tiny-random-gpt2",
             dict(bits=4, sym=False, group_size=-1, ratio=0.8),
-            16,
+            14,
         ),
         (
             OVModelForCausalLM,
@@ -182,13 +182,13 @@ class OVWeightCompressionTest(unittest.TestCase):
                 group_size=32,
                 ignored_scope={"names": ["__module.model.transformer.h.2.mlp.c_fc/aten::addmm/MatMul"]},
             ),
-            6,
+            4,
         ),
         (
             OVModelForCausalLM,
             "hf-internal-testing/tiny-random-gpt2",
             dict(bits=4, sym=False, group_size=-1, ratio=0.8, all_layers=True),
-            22,
+            18,
         ),
         (
             OVModelForCausalLM,
@@ -201,7 +201,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 sensitivity_metric="mean_activation_magnitude",
                 dataset="ptb",
             ),
-            16,
+            14,
         ),
         (
             OVModelForCausalLM,
@@ -215,7 +215,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 dataset="ptb",
                 awq=True,
             ),
-            16,
+            14,
         ),
     )
 
diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index 937c0bf3f5..4f55b23e71 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -365,7 +365,7 @@ def tearDown(self):
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -376,7 +376,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -385,7 +385,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -397,7 +397,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -574,7 +574,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
         expected_fake_quantize=28,
         expected_int8=28,
-        expected_binary_masks=48,
+        expected_binary_masks=40,
         compression_metrics=["compression_loss"],
     ),
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
@@ -591,7 +591,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
         expected_fake_quantize=28,
         expected_int8=28,
-        expected_binary_masks=48,
+        expected_binary_masks=40,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 8fabb34e38..04049172d3 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -102,12 +102,12 @@
 SEED = 42
 
 _ARCHITECTURES_TO_EXPECTED_INT8 = {
-    "bert": (70,),
+    "bert": (68,),
     "roberta": (68,),
     "albert": (84,),
     "vit": (64,),
     "blenderbot": (70,),
-    "gpt2": (46,),
+    "gpt2": (44,),
     "wav2vec2": (34,),
     "distilbert": (66,),
     "t5": (64, 104, 84),
@@ -116,7 +116,7 @@
     "stable-diffusion-xl-refiner": (366, 34, 42, 66),
 }
 
-_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (64, 477)}
+_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (62, 477)}
 
 
 def get_num_quantized_nodes(ov_model):

From b12a53692b58d002de1369cad34d7eff3a0b7ba3 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 8 Mar 2024 13:13:01 +0100
Subject: [PATCH 11/11] fix

---
 tests/openvino/test_training.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index 4f55b23e71..80298faf2b 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -574,7 +574,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
         expected_fake_quantize=28,
         expected_int8=28,
-        expected_binary_masks=40,
+        expected_binary_masks=48,
         compression_metrics=["compression_loss"],
     ),
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
@@ -591,7 +591,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
         expected_fake_quantize=28,
         expected_int8=28,
-        expected_binary_masks=40,
+        expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
@@ -749,7 +749,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=48,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -766,7 +766,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=48,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],