From 809329167ede24b488bbf3f971ea0918c7e9b923 Mon Sep 17 00:00:00 2001
From: Felix Marty <9808326+fxmarty@users.noreply.github.com>
Date: Wed, 13 Dec 2023 14:57:17 +0000
Subject: [PATCH 1/2] clearer error for sdpa

---
 src/transformers/modeling_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 3247c323685815..922566b3d9b6c1 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -1462,8 +1462,8 @@ def _check_and_enable_sdpa(cls, config, hard_check_only: bool = False) -> Pretra
         if hard_check_only:
             if not cls._supports_sdpa:
                 raise ValueError(
-                    f"{cls.__name__} does not support an attention implementation through torch.nn.functional.scaled_dot_product_attention yet. Please open an issue on GitHub to "
-                    "request support for this architecture: https://github.com/huggingface/transformers/issues/new"
+                    f"{cls.__name__} does not support an attention implementation through torch.nn.functional.scaled_dot_product_attention yet. Please request the"
+                    ' support for this architecture: https://github.com/huggingface/transformers/issues/28005. If you believe this error is a bug, please open an issue in Transformers GitHub repository and load your model with the argument `attn_implementation="eager"` meanwhile.'
                 )
             if not is_torch_sdpa_available():
                 raise ImportError(

From 37909049208d230f714690d4eae61add3bdc9308 Mon Sep 17 00:00:00 2001
From: Felix Marty <9808326+fxmarty@users.noreply.github.com>
Date: Wed, 13 Dec 2023 16:27:57 +0000
Subject: [PATCH 2/2] better message

---
 src/transformers/modeling_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 922566b3d9b6c1..50f1ca408fda04 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -1462,8 +1462,9 @@ def _check_and_enable_sdpa(cls, config, hard_check_only: bool = False) -> Pretra
         if hard_check_only:
             if not cls._supports_sdpa:
                 raise ValueError(
-                    f"{cls.__name__} does not support an attention implementation through torch.nn.functional.scaled_dot_product_attention yet. Please request the"
-                    ' support for this architecture: https://github.com/huggingface/transformers/issues/28005. If you believe this error is a bug, please open an issue in Transformers GitHub repository and load your model with the argument `attn_implementation="eager"` meanwhile.'
+                    f"{cls.__name__} does not support an attention implementation through torch.nn.functional.scaled_dot_product_attention yet."
+                    " Please request the support for this architecture: https://github.com/huggingface/transformers/issues/28005. If you believe"
+                    ' this error is a bug, please open an issue in Transformers GitHub repository and load your model with the argument `attn_implementation="eager"` meanwhile. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="eager")`'
                 )
             if not is_torch_sdpa_available():
                 raise ImportError(