Add ONNX export support for granite models (#2043)

* feat(exporters/onnx): Add GraniteOnnxConfig and task support list Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * feat: Add granite's normalized config for inference Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * feat(onnx opt): Add onnx optimization support for granite Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * fix(onnx/granite): Use LlamaOnnxConfig as the base for GraniteOnnxConfig Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * fix(onnxruntime): Add "granite" to list of model types with grouped attention Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * fix: Add granite to the list of models that require position_ids Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * fix(granite): Add MIN_TORCH_VERSION for recently fixed torch bug #2043 (comment) Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * test(granite): Add tiny random granite test for onnx exporter Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> * tests(onnxruntime): Add granite to onnxruntime tests Branch: OnnxGranite Signed-off-by: Gabe Goodhart <[email protected]> --------- Signed-off-by: Gabe Goodhart <[email protected]>
huggingface · Oct 31, 2024 · 7e8d857 · 7e8d857
1 parent 6802a0c
commit 7e8d857
Show file tree

Hide file tree

Showing 9 changed files with 19 additions and 1 deletion.
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -298,6 +298,11 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
     pass
 
 
+class GraniteOnnxConfig(LlamaOnnxConfig):
+    MIN_TRANSFORMERS_VERSION = version.parse("4.45.0")
+    MIN_TORCH_VERSION = version.parse("2.5.0")
+
+
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14  # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig

diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
@@ -86,6 +86,7 @@
     "phi",
     "phi3",
     "qwen2",
+    "granite",
 }
 
 

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -915,6 +915,13 @@ class TasksManager:
             "text-classification",
             onnx="LlamaOnnxConfig",
         ),
+        "granite": supported_tasks_mapping(
+            "feature-extraction",
+            "feature-extraction-with-past",
+            "text-generation",
+            "text-generation-with-past",
+            onnx="GraniteOnnxConfig",
+        ),
         "pegasus": supported_tasks_mapping(
             "feature-extraction",
             "feature-extraction-with-past",

diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
@@ -340,7 +340,7 @@ def prepare_past_key_values(
             if self.model_type == "gemma":
                 num_attention_heads = self.normalized_config.num_key_value_heads
                 embed_size_per_head = self.normalized_config.head_dim
-            elif self.model_type in {"mistral", "llama", "qwen2"}:
+            elif self.model_type in {"mistral", "llama", "qwen2", "granite"}:
                 num_attention_heads = self.normalized_config.num_key_value_heads
             else:
                 num_attention_heads = self.normalized_config.num_attention_heads

diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py
@@ -128,6 +128,7 @@ class ORTConfigManager:
         "gpt-neo": "gpt2",
         "gpt-neox": "gpt2",
         "gptj": "gpt2",
+        "granite": "gpt2",
         # longt5 with O4 results in segmentation fault
         "longt5": "bert",
         "llama": "gpt2",

diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
@@ -281,6 +281,7 @@ class NormalizedConfigManager:
         "xlm-roberta": NormalizedTextConfig,
         "yolos": NormalizedVisionConfig,
         "qwen2": NormalizedTextConfig,
+        "granite": NormalizedTextConfigWithGQA,
     }
 
     @classmethod

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
@@ -100,6 +100,7 @@
     "gpt-neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJModel",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",
     "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel",

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
@@ -2324,6 +2324,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "gpt_neo",
         "gpt_neox",
         "gptj",
+        "granite",
         "llama",
         "mistral",
         "mpt",

diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -104,6 +104,7 @@
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "hubert": "hf-internal-testing/tiny-random-HubertModel",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",
-Original file line number
+Diff line change
@@ Expand Up / @@ -86,6 +86,7 @@ @@
         "phi",
         "phi3",
         "qwen2",
+        "granite",
     }
@@ Expand Down @@