From 3d3f8988704e2844dbede67a6b61c98803db5c2b Mon Sep 17 00:00:00 2001
From: Ilya Druker <ilya.druker@cerence.com>
Date: Fri, 21 Jun 2024 11:07:35 -0400
Subject: [PATCH] Read use_external_data_format from ORTConfig file

When quantizing the models >2Gb, it's important to
set the flag use_external_data_format to 'true',
since otherwise the quantization will fail due to

`ValueError: Message onnx.ModelProto exceeds maximum protobuf size of 2GB`

However, currently there is no way to set the parameter
when using optimum-cli because there is no such
command option. Theoretically, it could be set when
using ORTConfig file with -c comman flag, because one
of the configuration parameters in it is use_external_data_format.
In fact, the optimum code ignores it and does not pass it
in quantize() function.

The goal of this change is to close this gap.
---
 optimum/onnxruntime/subpackage/commands/quantize.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/optimum/onnxruntime/subpackage/commands/quantize.py b/optimum/onnxruntime/subpackage/commands/quantize.py
index 6f6d843cc70..55c139873e1 100644
--- a/optimum/onnxruntime/subpackage/commands/quantize.py
+++ b/optimum/onnxruntime/subpackage/commands/quantize.py
@@ -77,6 +77,7 @@ def run(self):
 
         save_dir = self.args.output
         quantizers = []
+        use_external_data_format = False
 
         quantizers = [
             ORTQuantizer.from_pretrained(self.args.onnx_model, file_name=model.name)
@@ -96,7 +97,9 @@ def run(self):
                 "TensorRT quantization relies on static quantization that requires calibration, which is currently not supported through optimum-cli. Please adapt Optimum static quantization examples to run static quantization for TensorRT: https://github.com/huggingface/optimum/tree/main/examples/onnxruntime/quantization"
             )
         else:
-            qconfig = ORTConfig.from_pretrained(self.args.config).quantization
+            config = ORTConfig.from_pretrained(self.args.config)
+            qconfig = config.quantization
+            use_external_data_format = config.use_external_data_format
 
         for q in quantizers:
-            q.quantize(save_dir=save_dir, quantization_config=qconfig)
+            q.quantize(save_dir=save_dir, quantization_config=qconfig, use_external_data_format=use_external_data_format)