diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 6f739e254..faef3d1d0 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -43,8 +43,6 @@ from transformers import AutoProcessor, AutoTokenizer, DataCollator, PreTrainedModel, default_data_collator from transformers.pytorch_utils import Conv1D from transformers.utils import is_accelerate_available - -from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer @@ -524,6 +522,7 @@ def _quantize_torchmodel( quantization_config = ov_config.quantization_config if isinstance(quantization_config, OVWeightQuantizationConfig): + from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed if stateful: # patch model before weight compression model = patch_model_with_bettertransformer(model)