From 3d3f8988704e2844dbede67a6b61c98803db5c2b Mon Sep 17 00:00:00 2001 From: Ilya Druker Date: Fri, 21 Jun 2024 11:07:35 -0400 Subject: [PATCH] Read use_external_data_format from ORTConfig file When quantizing the models >2Gb, it's important to set the flag use_external_data_format to 'true', since otherwise the quantization will fail due to `ValueError: Message onnx.ModelProto exceeds maximum protobuf size of 2GB` However, currently there is no way to set the parameter when using optimum-cli because there is no such command option. Theoretically, it could be set when using ORTConfig file with -c comman flag, because one of the configuration parameters in it is use_external_data_format. In fact, the optimum code ignores it and does not pass it in quantize() function. The goal of this change is to close this gap. --- optimum/onnxruntime/subpackage/commands/quantize.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/optimum/onnxruntime/subpackage/commands/quantize.py b/optimum/onnxruntime/subpackage/commands/quantize.py index 6f6d843cc70..55c139873e1 100644 --- a/optimum/onnxruntime/subpackage/commands/quantize.py +++ b/optimum/onnxruntime/subpackage/commands/quantize.py @@ -77,6 +77,7 @@ def run(self): save_dir = self.args.output quantizers = [] + use_external_data_format = False quantizers = [ ORTQuantizer.from_pretrained(self.args.onnx_model, file_name=model.name) @@ -96,7 +97,9 @@ def run(self): "TensorRT quantization relies on static quantization that requires calibration, which is currently not supported through optimum-cli. Please adapt Optimum static quantization examples to run static quantization for TensorRT: https://github.com/huggingface/optimum/tree/main/examples/onnxruntime/quantization" ) else: - qconfig = ORTConfig.from_pretrained(self.args.config).quantization + config = ORTConfig.from_pretrained(self.args.config) + qconfig = config.quantization + use_external_data_format = config.use_external_data_format for q in quantizers: - q.quantize(save_dir=save_dir, quantization_config=qconfig) + q.quantize(save_dir=save_dir, quantization_config=qconfig, use_external_data_format=use_external_data_format)