diff --git a/optimum/onnxruntime/subpackage/commands/quantize.py b/optimum/onnxruntime/subpackage/commands/quantize.py index 6f6d843cc70..45df903e0c2 100644 --- a/optimum/onnxruntime/subpackage/commands/quantize.py +++ b/optimum/onnxruntime/subpackage/commands/quantize.py @@ -77,6 +77,7 @@ def run(self): save_dir = self.args.output quantizers = [] + use_external_data_format = False quantizers = [ ORTQuantizer.from_pretrained(self.args.onnx_model, file_name=model.name) @@ -96,7 +97,11 @@ def run(self): "TensorRT quantization relies on static quantization that requires calibration, which is currently not supported through optimum-cli. Please adapt Optimum static quantization examples to run static quantization for TensorRT: https://github.com/huggingface/optimum/tree/main/examples/onnxruntime/quantization" ) else: - qconfig = ORTConfig.from_pretrained(self.args.config).quantization + config = ORTConfig.from_pretrained(self.args.config) + qconfig = config.quantization + use_external_data_format = config.use_external_data_format for q in quantizers: - q.quantize(save_dir=save_dir, quantization_config=qconfig) + q.quantize( + save_dir=save_dir, quantization_config=qconfig, use_external_data_format=use_external_data_format + )