From c7b7d21ae9be8bf8ffb1704c9475451dd207f912 Mon Sep 17 00:00:00 2001
From: tedasdf <teedsingyau@gmail.com>
Date: Thu, 7 Nov 2024 10:50:57 +1100
Subject: [PATCH 1/6] feat: add onnx for visualbert

---
 optimum/exporters/onnx/model_configs.py | 18 ++++++++++++++++++
 optimum/exporters/tasks.py              |  6 ++++++
 tests/exporters/exporters_utils.py      |  2 ++
 3 files changed, 26 insertions(+)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index cc752779d30..0b0edd04a99 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -117,6 +117,24 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
             "token_type_ids": dynamic_axis,
         }
 
+class VisualBertOnnxConfig(TextAndVisionOnnxConfig):
+    DEFAULT_ONNX_OPSET = 11
+
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        return {
+            "input_ids": {0: "batch_size", 1: "sequence_length"},
+            "attention_mask": {0: "batch_size", 1: "sequence_length"},
+            "pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"},
+        }
+    
+    @property
+    def outputs(self) -> Dict[str, Dict[int, str]]:
+        return {
+            "last_hidden_state": {0: "batch_size", 1: "sequence_length"},
+            "pooler_output": {0: "batch_size"},
+        }
+
 
 class AlbertOnnxConfig(BertOnnxConfig):
     DEFAULT_ONNX_OPSET = 14  # now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index fdc8bfcb539..3630e5fa95a 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -1108,6 +1108,12 @@ class TasksManager:
             "text-to-audio",
             onnx="VitsOnnxConfig",
         ),
+        "visualbert": supported_tasks_mapping(
+            "multiple-choice",
+            "question-answering",
+            "image-to-text",
+            onnx="VisualBertOnnxConfig",
+        ),
         "wavlm": supported_tasks_mapping(
             "feature-extraction",
             "automatic-speech-recognition",
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index ccccb5510bf..87cbed4f331 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -197,6 +197,7 @@
             "document-question-answering-with-past",
         ],
     },
+     "visualbert": "hf-internal-testing/tiny-random-VisualBertModel",
 }
 
 
@@ -286,6 +287,7 @@
     "speech-to-text": "codenamewei/speech-to-text",
     "xlm": "xlm-clm-ende-1024",
     "xlm-roberta": "Unbabel/xlm-roberta-comet-small",
+    "visualbert": "unc-nlp/visualbert-uncased",
 }
 
 TENSORFLOW_EXPORT_MODELS = {

From 5aae9cbfb8b0848cb45d35297b6fc84bb2920064 Mon Sep 17 00:00:00 2001
From: tedasdf <teedsingyau@gmail.com>
Date: Thu, 7 Nov 2024 10:58:52 +1100
Subject: [PATCH 2/6] make style

---
 optimum/exporters/onnx/model_configs.py | 3 ++-
 tests/exporters/exporters_utils.py      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 0b0edd04a99..321f18ad692 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -117,6 +117,7 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
             "token_type_ids": dynamic_axis,
         }
 
+
 class VisualBertOnnxConfig(TextAndVisionOnnxConfig):
     DEFAULT_ONNX_OPSET = 11
 
@@ -127,7 +128,7 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
             "attention_mask": {0: "batch_size", 1: "sequence_length"},
             "pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"},
         }
-    
+
     @property
     def outputs(self) -> Dict[str, Dict[int, str]]:
         return {
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 87cbed4f331..b8160d65673 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -197,7 +197,7 @@
             "document-question-answering-with-past",
         ],
     },
-     "visualbert": "hf-internal-testing/tiny-random-VisualBertModel",
+    "visualbert": "hf-internal-testing/tiny-random-VisualBertModel",
 }
 
 

From 2d48a9b02e6f01479ad931b6422586c0b190a329 Mon Sep 17 00:00:00 2001
From: tedasdf <154312357+tedasdf@users.noreply.github.com>
Date: Tue, 3 Dec 2024 10:00:05 +0800
Subject: [PATCH 3/6] fix NORMALIZED_CONFIG_CLASS and task.py

---
 optimum/exporters/onnx/model_configs.py | 2 ++
 optimum/exporters/tasks.py              | 4 ++--
 tests/exporters/exporters_utils.py      | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 321f18ad692..a3c44dbd065 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -121,6 +121,8 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
 class VisualBertOnnxConfig(TextAndVisionOnnxConfig):
     DEFAULT_ONNX_OPSET = 11
 
+    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
+    
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
         return {
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 3630e5fa95a..78976b9a042 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -1108,10 +1108,10 @@ class TasksManager:
             "text-to-audio",
             onnx="VitsOnnxConfig",
         ),
-        "visualbert": supported_tasks_mapping(
+        "visual_bert": supported_tasks_mapping(
             "multiple-choice",
             "question-answering",
-            "image-to-text",
+            "feature-extraction",
             onnx="VisualBertOnnxConfig",
         ),
         "wavlm": supported_tasks_mapping(
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index b8160d65673..6cf65e982a4 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -197,7 +197,7 @@
             "document-question-answering-with-past",
         ],
     },
-    "visualbert": "hf-internal-testing/tiny-random-VisualBertModel",
+    "visual_bert": "hf-internal-testing/tiny-random-VisualBertModel",
 }
 
 
@@ -287,7 +287,7 @@
     "speech-to-text": "codenamewei/speech-to-text",
     "xlm": "xlm-clm-ende-1024",
     "xlm-roberta": "Unbabel/xlm-roberta-comet-small",
-    "visualbert": "unc-nlp/visualbert-uncased",
+    "visual_bert": "uclanlp/visualbert-vqa-coco-pre",
 }
 
 TENSORFLOW_EXPORT_MODELS = {

From 710e2e2b0a0314cf4c77cdc57a1196057ddb99de Mon Sep 17 00:00:00 2001
From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
Date: Tue, 3 Dec 2024 17:57:28 +0100
Subject: [PATCH 4/6] fix

---
 tests/exporters/exporters_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 6cf65e982a4..06a5246d247 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -197,7 +197,7 @@
             "document-question-answering-with-past",
         ],
     },
-    "visual_bert": "hf-internal-testing/tiny-random-VisualBertModel",
+    "visual-bert": "hf-internal-testing/tiny-random-VisualBertModel",
 }
 
 

From fb5bab8f2de7fc11a3d6dccd52195b9aa4769cd8 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
Date: Tue, 3 Dec 2024 17:57:38 +0100
Subject: [PATCH 5/6] fix

---
 tests/exporters/exporters_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 06a5246d247..22996dbe81e 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -287,7 +287,7 @@
     "speech-to-text": "codenamewei/speech-to-text",
     "xlm": "xlm-clm-ende-1024",
     "xlm-roberta": "Unbabel/xlm-roberta-comet-small",
-    "visual_bert": "uclanlp/visualbert-vqa-coco-pre",
+    "visual-bert": "uclanlp/visualbert-vqa-coco-pre",
 }
 
 TENSORFLOW_EXPORT_MODELS = {

From 335022d55b00e347139110a94dc16589dfc37ac3 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
Date: Tue, 3 Dec 2024 17:57:49 +0100
Subject: [PATCH 6/6] fix

---
 optimum/exporters/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 78976b9a042..e1162593082 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -1108,7 +1108,7 @@ class TasksManager:
             "text-to-audio",
             onnx="VitsOnnxConfig",
         ),
-        "visual_bert": supported_tasks_mapping(
+        "visual-bert": supported_tasks_mapping(
             "multiple-choice",
             "question-answering",
             "feature-extraction",