huggingface · echarlaix · Mar 8, 2024 · Mar 6, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
@@ -32,7 +32,7 @@ jobs:
         python -m pip install --upgrade pip
         # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-        pip install .[openvino,openvino-tokenizers,nncf,tests,diffusers]
+        pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
     - name: Test with Pytest
       run: |
         pytest tests/openvino/ --ignore test_modeling_basic
diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
@@ -18,6 +18,7 @@
 from transformers.utils import OptionalDependencyNotAvailable, _LazyModule
 
 from .utils import (
+    is_accelerate_available,
     is_diffusers_available,
     is_ipex_available,
     is_neural_compressor_available,
@@ -29,6 +30,7 @@
 
 _import_structure = {
     "openvino": [],
+    "utils.dummy_openvino_and_nncf_objects": [],
 }
 
 try:
@@ -57,13 +59,19 @@
     if not (is_openvino_available() and is_nncf_available()):
         raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
-    _import_structure["utils.dummy_openvino_and_nncf_objects"] = [
-        "OVQuantizer",
-        "OVTrainer",
-        "OVTrainingArguments",
-    ]
+    _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVQuantizer", "OVTrainingArguments"])
+else:
+    _import_structure["openvino"].extend(["OVQuantizer", "OVTrainingArguments"])
+
+
+try:
+    if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVTrainer"])
 else:
-    _import_structure["openvino"].extend(["OVQuantizer", "OVTrainer", "OVTrainingArguments"])
+    _import_structure["openvino"].extend(["OVTrainer"])
+
 
 try:
     if not (is_openvino_available() and is_diffusers_available()):
@@ -145,6 +153,7 @@
         "INCSeq2SeqTrainer",
         "INCTrainer",
     ]
+
 try:
     if not (is_neural_compressor_available() and is_diffusers_available()):
         raise OptionalDependencyNotAvailable()
@@ -177,13 +186,17 @@
         if not (is_openvino_available() and is_nncf_available()):
             raise OptionalDependencyNotAvailable()
     except OptionalDependencyNotAvailable:
-        from .utils.dummy_openvino_and_nncf_objects import (
-            OVQuantizer,
-            OVTrainer,
-            OVTrainingArguments,
-        )
+        from .utils.dummy_openvino_and_nncf_objects import OVQuantizer, OVTrainingArguments
+    else:
+        from .openvino import OVQuantizer, OVTrainingArguments
+
+    try:
+        if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        from .utils.dummy_openvino_and_nncf_objects import OVTrainer
     else:
-        from .openvino import OVQuantizer, OVTrainer, OVTrainingArguments
+        from .openvino import OVTrainer
 
     try:
         if not (is_openvino_available() and is_diffusers_available()):

diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
@@ -14,7 +14,7 @@
 
 import logging
 
-from ..utils.import_utils import is_diffusers_available, is_nncf_available
+from ..utils.import_utils import is_accelerate_available, is_diffusers_available, is_nncf_available
 from .utils import (
     OV_DECODER_NAME,
     OV_DECODER_WITH_PAST_NAME,
@@ -37,9 +37,11 @@
     patch_torch_operators()
 
     from .quantization import OVQuantizer
-    from .trainer import OVTrainer
     from .training_args import OVTrainingArguments
 
+    if is_accelerate_available():
+        from .trainer import OVTrainer
+
 
 from .configuration import OVConfig, OVWeightQuantizationConfig
 from .modeling import (

diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -17,7 +17,7 @@
 import logging
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Tuple, Union
 
 import nncf
 import openvino
@@ -56,8 +56,7 @@
 
 
 if is_datasets_available():
-    if TYPE_CHECKING:
-        from datasets import Dataset
+    from datasets import Dataset
 
 register_module(ignored_algorithms=[])(Conv1D)
 
@@ -147,6 +146,7 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No
             )
         self.task = task or feature
         self.seed = seed
+        # TODO : deprecate input_names
         self.input_names = None
         signature = inspect.signature(self.model.forward)
         self._signature_columns = list(signature.parameters.keys())
@@ -526,9 +526,15 @@ def _get_calibration_dataloader(
         data_collator: Optional[DataCollator] = None,
     ) -> OVDataLoader:
         data_collator = data_collator if data_collator is not None else default_data_collator
+
+        if not is_datasets_available() or not isinstance(calibration_dataset, Dataset):
+            logger.warning(
+                "`remove_unused_columns` set to `False` as calibration_dataset is not an instance of `datasets.Dataset`"
+            )
+            remove_unused_columns = False
+
         if remove_unused_columns:
             calibration_dataset = self._remove_unused_columns(calibration_dataset)
-        self.input_names = calibration_dataset.column_names
         generator = torch.Generator()
         generator.manual_seed(self.seed)
         sampler = RandomSampler(calibration_dataset, generator=generator)

diff --git a/optimum/intel/utils/__init__.py b/optimum/intel/utils/__init__.py
@@ -16,6 +16,7 @@
     _neural_compressor_version,
     _torch_version,
     compare_versions,
+    is_accelerate_available,
     is_diffusers_available,
     is_ipex_available,
     is_neural_compressor_available,

diff --git a/optimum/intel/utils/dummy_openvino_and_nncf_objects.py b/optimum/intel/utils/dummy_openvino_and_nncf_objects.py
@@ -27,14 +27,14 @@ def from_pretrained(cls, *args, **kwargs):
 
 
 class OVTrainer(metaclass=DummyObject):
-    _backends = ["openvino", "nncf"]
+    _backends = ["openvino", "nncf", "accelerate"]
 
     def __init__(self, *args, **kwargs):
-        requires_backends(self, ["openvino", "nncf"])
+        requires_backends(self, ["openvino", "nncf", "accelerate"])
 
     @classmethod
     def from_pretrained(cls, *args, **kwargs):
-        requires_backends(cls, ["openvino", "nncf"])
+        requires_backends(cls, ["openvino", "nncf", "accelerate"])
 
 
 class OVQuantizer(metaclass=DummyObject):

diff --git a/optimum/intel/utils/import_utils.py b/optimum/intel/utils/import_utils.py
@@ -156,6 +156,16 @@
         _datasets_available = False
 
 
+_accelerate_available = importlib.util.find_spec("accelerate") is not None
+_accelerate_version = "N/A"
+
+if _accelerate_available:
+    try:
+        _accelerate_version = importlib_metadata.version("accelerate")
+    except importlib_metadata.PackageNotFoundError:
+        _accelerate_available = False
+
+
 def is_transformers_available():
     return _transformers_available
 
@@ -196,6 +206,10 @@ def is_datasets_available():
     return _datasets_available
 
 
+def is_accelerate_available():
+    return _accelerate_available
+
+
 # This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L319
 def compare_versions(library_or_version: Union[str, Version], operation: str, requirement_version: str):
     """
@@ -317,13 +331,19 @@ def is_timm_version(operation: str, version: str):
 `pip install datasets`. Please note that you may need to restart your runtime after installation.
 """
 
+ACCELERATE_IMPORT_ERROR = """
+{0} requires the accelerate library but it was not found in your environment. You can install it with pip:
+`pip install accelerate`. Please note that you may need to restart your runtime after installation.
+"""
+
 BACKENDS_MAPPING = OrderedDict(
     [
         ("diffusers", (is_diffusers_available, DIFFUSERS_IMPORT_ERROR)),
         ("ipex", (is_ipex_available, IPEX_IMPORT_ERROR)),
         ("nncf", (is_nncf_available, NNCF_IMPORT_ERROR)),
         ("openvino", (is_openvino_available, OPENVINO_IMPORT_ERROR)),
         ("neural_compressor", (is_neural_compressor_available, NEURAL_COMPRESSOR_IMPORT_ERROR)),
+        ("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)),
     ]
 )
 

diff --git a/setup.py b/setup.py
@@ -18,10 +18,11 @@
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",
-    "accelerate",  # transformers 4.29 require accelerate for PyTorch
+    "onnx",
 ]
 
 TESTS_REQUIRE = [
+    "accelerate",
     "pytest",
     "parameterized",
     "Pillow",
@@ -39,11 +40,11 @@
 QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
 
 EXTRAS_REQUIRE = {
-    "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0"],
-    "openvino": ["openvino>=2023.3", "onnx", "onnxruntime", "nncf>=2.8.1"],
+    "neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
+    "openvino": ["openvino>=2023.3", "nncf>=2.8.1"],
     "openvino-tokenizers": ["openvino-tokenizers[transformers]"],
     "nncf": ["nncf>=2.8.1"],
-    "ipex": ["intel-extension-for-pytorch", "onnx"],
+    "ipex": ["intel-extension-for-pytorch"],
     "diffusers": ["diffusers"],
     "quality": QUALITY_REQUIRE,
     "tests": TESTS_REQUIRE,

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -154,24 +154,24 @@ class OVWeightCompressionTest(unittest.TestCase):
     # TODO : add models
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
         (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70, 70),
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44),
     )
 
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 64, 365),)
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 388),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 365),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 385),)
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS = (
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 16, 136),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 14, 136),
     )
     SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
-        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46),
+        (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44),
     )
 
     LOAD_IN_4_BITS_SCOPE = (
         (
             OVModelForCausalLM,
             "hf-internal-testing/tiny-random-gpt2",
             dict(bits=4, sym=False, group_size=-1, ratio=0.8),
-            16,
+            14,
         ),
         (
             OVModelForCausalLM,
@@ -182,13 +182,13 @@ class OVWeightCompressionTest(unittest.TestCase):
                 group_size=32,
                 ignored_scope={"names": ["__module.model.transformer.h.2.mlp.c_fc/aten::addmm/MatMul"]},
             ),
-            6,
+            4,
         ),
         (
             OVModelForCausalLM,
             "hf-internal-testing/tiny-random-gpt2",
             dict(bits=4, sym=False, group_size=-1, ratio=0.8, all_layers=True),
-            22,
+            18,
         ),
         (
             OVModelForCausalLM,
@@ -201,7 +201,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 sensitivity_metric="mean_activation_magnitude",
                 dataset="ptb",
             ),
-            16,
+            14,
         ),
         (
             OVModelForCausalLM,
@@ -215,7 +215,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 dataset="ptb",
                 awq=True,
             ),
-            16,
+            14,
         ),
     )