From c7b7d21ae9be8bf8ffb1704c9475451dd207f912 Mon Sep 17 00:00:00 2001 From: tedasdf Date: Thu, 7 Nov 2024 10:50:57 +1100 Subject: [PATCH 1/6] feat: add onnx for visualbert --- optimum/exporters/onnx/model_configs.py | 18 ++++++++++++++++++ optimum/exporters/tasks.py | 6 ++++++ tests/exporters/exporters_utils.py | 2 ++ 3 files changed, 26 insertions(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index cc752779d30..0b0edd04a99 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -117,6 +117,24 @@ def inputs(self) -> Dict[str, Dict[int, str]]: "token_type_ids": dynamic_axis, } +class VisualBertOnnxConfig(TextAndVisionOnnxConfig): + DEFAULT_ONNX_OPSET = 11 + + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return { + "input_ids": {0: "batch_size", 1: "sequence_length"}, + "attention_mask": {0: "batch_size", 1: "sequence_length"}, + "pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"}, + } + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "last_hidden_state": {0: "batch_size", 1: "sequence_length"}, + "pooler_output": {0: "batch_size"}, + } + class AlbertOnnxConfig(BertOnnxConfig): DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index fdc8bfcb539..3630e5fa95a 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1108,6 +1108,12 @@ class TasksManager: "text-to-audio", onnx="VitsOnnxConfig", ), + "visualbert": supported_tasks_mapping( + "multiple-choice", + "question-answering", + "image-to-text", + onnx="VisualBertOnnxConfig", + ), "wavlm": supported_tasks_mapping( "feature-extraction", "automatic-speech-recognition", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index ccccb5510bf..87cbed4f331 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -197,6 +197,7 @@ "document-question-answering-with-past", ], }, + "visualbert": "hf-internal-testing/tiny-random-VisualBertModel", } @@ -286,6 +287,7 @@ "speech-to-text": "codenamewei/speech-to-text", "xlm": "xlm-clm-ende-1024", "xlm-roberta": "Unbabel/xlm-roberta-comet-small", + "visualbert": "unc-nlp/visualbert-uncased", } TENSORFLOW_EXPORT_MODELS = { From 5aae9cbfb8b0848cb45d35297b6fc84bb2920064 Mon Sep 17 00:00:00 2001 From: tedasdf Date: Thu, 7 Nov 2024 10:58:52 +1100 Subject: [PATCH 2/6] make style --- optimum/exporters/onnx/model_configs.py | 3 ++- tests/exporters/exporters_utils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 0b0edd04a99..321f18ad692 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -117,6 +117,7 @@ def inputs(self) -> Dict[str, Dict[int, str]]: "token_type_ids": dynamic_axis, } + class VisualBertOnnxConfig(TextAndVisionOnnxConfig): DEFAULT_ONNX_OPSET = 11 @@ -127,7 +128,7 @@ def inputs(self) -> Dict[str, Dict[int, str]]: "attention_mask": {0: "batch_size", 1: "sequence_length"}, "pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"}, } - + @property def outputs(self) -> Dict[str, Dict[int, str]]: return { diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 87cbed4f331..b8160d65673 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -197,7 +197,7 @@ "document-question-answering-with-past", ], }, - "visualbert": "hf-internal-testing/tiny-random-VisualBertModel", + "visualbert": "hf-internal-testing/tiny-random-VisualBertModel", } From 2d48a9b02e6f01479ad931b6422586c0b190a329 Mon Sep 17 00:00:00 2001 From: tedasdf <154312357+tedasdf@users.noreply.github.com> Date: Tue, 3 Dec 2024 10:00:05 +0800 Subject: [PATCH 3/6] fix NORMALIZED_CONFIG_CLASS and task.py --- optimum/exporters/onnx/model_configs.py | 2 ++ optimum/exporters/tasks.py | 4 ++-- tests/exporters/exporters_utils.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 321f18ad692..a3c44dbd065 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -121,6 +121,8 @@ def inputs(self) -> Dict[str, Dict[int, str]]: class VisualBertOnnxConfig(TextAndVisionOnnxConfig): DEFAULT_ONNX_OPSET = 11 + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + @property def inputs(self) -> Dict[str, Dict[int, str]]: return { diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 3630e5fa95a..78976b9a042 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1108,10 +1108,10 @@ class TasksManager: "text-to-audio", onnx="VitsOnnxConfig", ), - "visualbert": supported_tasks_mapping( + "visual_bert": supported_tasks_mapping( "multiple-choice", "question-answering", - "image-to-text", + "feature-extraction", onnx="VisualBertOnnxConfig", ), "wavlm": supported_tasks_mapping( diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index b8160d65673..6cf65e982a4 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -197,7 +197,7 @@ "document-question-answering-with-past", ], }, - "visualbert": "hf-internal-testing/tiny-random-VisualBertModel", + "visual_bert": "hf-internal-testing/tiny-random-VisualBertModel", } @@ -287,7 +287,7 @@ "speech-to-text": "codenamewei/speech-to-text", "xlm": "xlm-clm-ende-1024", "xlm-roberta": "Unbabel/xlm-roberta-comet-small", - "visualbert": "unc-nlp/visualbert-uncased", + "visual_bert": "uclanlp/visualbert-vqa-coco-pre", } TENSORFLOW_EXPORT_MODELS = { From 710e2e2b0a0314cf4c77cdc57a1196057ddb99de Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:57:28 +0100 Subject: [PATCH 4/6] fix --- tests/exporters/exporters_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 6cf65e982a4..06a5246d247 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -197,7 +197,7 @@ "document-question-answering-with-past", ], }, - "visual_bert": "hf-internal-testing/tiny-random-VisualBertModel", + "visual-bert": "hf-internal-testing/tiny-random-VisualBertModel", } From fb5bab8f2de7fc11a3d6dccd52195b9aa4769cd8 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:57:38 +0100 Subject: [PATCH 5/6] fix --- tests/exporters/exporters_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 06a5246d247..22996dbe81e 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -287,7 +287,7 @@ "speech-to-text": "codenamewei/speech-to-text", "xlm": "xlm-clm-ende-1024", "xlm-roberta": "Unbabel/xlm-roberta-comet-small", - "visual_bert": "uclanlp/visualbert-vqa-coco-pre", + "visual-bert": "uclanlp/visualbert-vqa-coco-pre", } TENSORFLOW_EXPORT_MODELS = { From 335022d55b00e347139110a94dc16589dfc37ac3 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:57:49 +0100 Subject: [PATCH 6/6] fix --- optimum/exporters/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 78976b9a042..e1162593082 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1108,7 +1108,7 @@ class TasksManager: "text-to-audio", onnx="VitsOnnxConfig", ), - "visual_bert": supported_tasks_mapping( + "visual-bert": supported_tasks_mapping( "multiple-choice", "question-answering", "feature-extraction",