From 31093a20d016cd30484fd7bbe0970bb72d2a58a6 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 19 Dec 2024 13:20:47 +0000 Subject: [PATCH 1/6] Introduce `DisableCompileContextManager` --- optimum/exporters/onnx/__main__.py | 41 +++++++++++++++++------------ optimum/exporters/onnx/constants.py | 4 +++ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 6a2cc6834a6..6728f3e6261 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -15,6 +15,7 @@ """Entry point to the optimum.exporters.onnx command line.""" import argparse +import contextlib import warnings from pathlib import Path @@ -29,7 +30,8 @@ from ...utils import DEFAULT_DUMMY_SHAPES, logging from ...utils.save_utils import maybe_load_preprocessors from ..tasks import TasksManager -from .constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED +from ..utils import DisableCompileContextManager +from .constants import COMPILE_ARCHS_ONNX_EXPORT_NOT_SUPPORTED, SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED from .convert import onnx_export_from_model @@ -265,6 +267,8 @@ def main_export( f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" ) + get_model_context_manager = contextlib.nullcontext() + custom_architecture = False loading_kwargs = {} if library_name == "transformers": @@ -299,23 +303,26 @@ def main_export( # TODO: Fix in Transformers so that SdpaAttention class can be exported to ONNX. `attn_implementation` is introduced in Transformers 4.36. if model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED and _transformers_version >= version.parse("4.35.99"): loading_kwargs["attn_implementation"] = "eager" + elif model_type in COMPILE_ARCHS_ONNX_EXPORT_NOT_SUPPORTED: + get_model_context_manager = DisableCompileContextManager() - model = TasksManager.get_model_from_task( - task, - model_name_or_path, - subfolder=subfolder, - revision=revision, - cache_dir=cache_dir, - token=token, - local_files_only=local_files_only, - force_download=force_download, - trust_remote_code=trust_remote_code, - framework=framework, - torch_dtype=torch_dtype, - device=device, - library_name=library_name, - **loading_kwargs, - ) + with get_model_context_manager: + model = TasksManager.get_model_from_task( + task, + model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + token=token, + local_files_only=local_files_only, + force_download=force_download, + trust_remote_code=trust_remote_code, + framework=framework, + torch_dtype=torch_dtype, + device=device, + library_name=library_name, + **loading_kwargs, + ) needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None diff --git a/optimum/exporters/onnx/constants.py b/optimum/exporters/onnx/constants.py index 0a6f9f9b363..438b98b2ebd 100644 --- a/optimum/exporters/onnx/constants.py +++ b/optimum/exporters/onnx/constants.py @@ -39,3 +39,7 @@ "musicgen", "whisper", ] + +COMPILE_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [ + "modernbert", +] From d33710d341140500c42af92d286741c5922b8638 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 19 Dec 2024 13:32:29 +0000 Subject: [PATCH 2/6] DisableCompileContextManager definition --- optimum/exporters/utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/optimum/exporters/utils.py b/optimum/exporters/utils.py index 60de169de5e..51a90e3a2be 100644 --- a/optimum/exporters/utils.py +++ b/optimum/exporters/utils.py @@ -675,3 +675,15 @@ def _get_submodels_and_export_configs( export_config = next(iter(models_and_export_configs.values()))[1] return export_config, models_and_export_configs + + +class DisableCompileContextManager: + def __init__(self): + self._original_compile = torch.compile + + def __enter__(self): + # Turn torch.compile into a no-op + torch.compile = lambda *args, **kwargs: lambda x: x + + def __exit__(self, exc_type, exc_val, exc_tb): + torch.compile = self._original_compile From 282177aeff426ac68c629999f994317ce211b034 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 19 Dec 2024 13:33:02 +0000 Subject: [PATCH 3/6] Add ONNX export support for modernbert --- optimum/exporters/onnx/model_configs.py | 4 ++++ optimum/exporters/tasks.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 4c5a727a183..564e98b55bf 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -179,6 +179,10 @@ def inputs(self) -> Dict[str, Dict[int, str]]: return {"input_ids": dynamic_axis, "attention_mask": dynamic_axis} +class ModernBertOnnxConfig(DistilBertOnnxConfig): + pass + + class MPNetOnnxConfig(DistilBertOnnxConfig): DEFAULT_ONNX_OPSET = 12 # For lower opsets, results in: Type 'tensor(int64)' of input parameter (/0/auto_model/encoder/Add_1_output_0) of operator (Min) in node (/0/auto_model/encoder/Min) is invalid. diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 7cb5a31d2d5..59c066ac389 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -891,6 +891,15 @@ class TasksManager: "image-classification", onnx="MobileNetV2OnnxConfig", ), + "modernbert": supported_tasks_mapping( + "feature-extraction", + "fill-mask", + "text-classification", + "multiple-choice", + "token-classification", + "question-answering", + onnx="ModernBertOnnxConfig", + ), "mpnet": supported_tasks_mapping( "feature-extraction", "fill-mask", From 197c985af532f18bdcd588961e4d6e7e6d4e3c3d Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 20 Dec 2024 15:10:20 +0000 Subject: [PATCH 4/6] Always use `DisableCompileContextManager` during export --- optimum/exporters/onnx/__main__.py | 9 ++------- optimum/exporters/onnx/constants.py | 4 ---- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 6728f3e6261..280c6fc6554 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -15,7 +15,6 @@ """Entry point to the optimum.exporters.onnx command line.""" import argparse -import contextlib import warnings from pathlib import Path @@ -31,7 +30,7 @@ from ...utils.save_utils import maybe_load_preprocessors from ..tasks import TasksManager from ..utils import DisableCompileContextManager -from .constants import COMPILE_ARCHS_ONNX_EXPORT_NOT_SUPPORTED, SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED +from .constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED from .convert import onnx_export_from_model @@ -267,8 +266,6 @@ def main_export( f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" ) - get_model_context_manager = contextlib.nullcontext() - custom_architecture = False loading_kwargs = {} if library_name == "transformers": @@ -303,10 +300,8 @@ def main_export( # TODO: Fix in Transformers so that SdpaAttention class can be exported to ONNX. `attn_implementation` is introduced in Transformers 4.36. if model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED and _transformers_version >= version.parse("4.35.99"): loading_kwargs["attn_implementation"] = "eager" - elif model_type in COMPILE_ARCHS_ONNX_EXPORT_NOT_SUPPORTED: - get_model_context_manager = DisableCompileContextManager() - with get_model_context_manager: + with DisableCompileContextManager(): model = TasksManager.get_model_from_task( task, model_name_or_path, diff --git a/optimum/exporters/onnx/constants.py b/optimum/exporters/onnx/constants.py index 438b98b2ebd..0a6f9f9b363 100644 --- a/optimum/exporters/onnx/constants.py +++ b/optimum/exporters/onnx/constants.py @@ -39,7 +39,3 @@ "musicgen", "whisper", ] - -COMPILE_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [ - "modernbert", -] From 0c267a56ca8e6e1a1ae6fadc5e91cdeb6a8a9cca Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 20 Dec 2024 15:16:31 +0000 Subject: [PATCH 5/6] Add modernbert to listed models --- docs/source/exporters/onnx/overview.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index b5129c23f21..18c75953c3a 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -74,6 +74,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - MobileVit - MobileNet v1 - MobileNet v2 +- ModernBert - MPNet - MT5 - Musicgen (text-conditional only) From baaca83e7c0da5a4b79573768a5922b32a5f8cad Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 20 Dec 2024 15:16:42 +0000 Subject: [PATCH 6/6] Add modernbert unit tests --- tests/exporters/exporters_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 900b5f3b5ce..d256e16dd4f 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -125,6 +125,7 @@ "mobilenet-v2": "hf-internal-testing/tiny-random-MobileNetV2Model", "mobilenet-v1": "google/mobilenet_v1_0.75_192", "mobilevit": "hf-internal-testing/tiny-random-mobilevit", + "modernbert": "hf-internal-testing/tiny-random-ModernBertForMaskedLM", "mpnet": "hf-internal-testing/tiny-random-MPNetModel", "mpt": "hf-internal-testing/tiny-random-MptForCausalLM", "mt5": "lewtun/tiny-random-mt5", @@ -266,6 +267,7 @@ # "mobilenet_v1": "google/mobilenet_v1_0.75_192", # "mobilenet_v2": "google/mobilenet_v2_0.35_96", "mobilevit": "apple/mobilevit-small", + "modernbert": "answerdotai/ModernBERT-base", "mpt": "mosaicml/mpt-7b", "mt5": "lewtun/tiny-random-mt5", # Not using google/mt5-small because it takes too much time for testing. "musicgen": "facebook/musicgen-small",