diff --git a/src/transformers/pipelines/image_text_to_text.py b/src/transformers/pipelines/image_text_to_text.py
index 09ecc1e92ee4ec..e7438ae2dffd05 100644
--- a/src/transformers/pipelines/image_text_to_text.py
+++ b/src/transformers/pipelines/image_text_to_text.py
@@ -278,7 +278,6 @@ def __call__(
             text[0], (list, tuple, dict)
         ):
             # We have one or more prompts in list-of-dicts format, so this is chat mode
-
             if isinstance(text[0], dict):
                 return super().__call__(Chat(text, images), **kwargs)
             else:
@@ -287,6 +286,13 @@ def __call__(
                 chats = [Chat(chat, image) for chat, image in zip(text, images)]  # 🐈 🐈 🐈
                 return super().__call__(chats, **kwargs)
 
+        # encourage the user to use the chat format if supported
+        if hasattr(self.processor, "chat_template") and self.processor.chat_template is not None:
+            logger.warning_once(
+                "The pipeline detected no chat format in the prompt, but this model supports chat format. "
+                "Consider using the chat format for better results. For more information, see https://huggingface.co/docs/transformers/en/chat_templating"
+            )
+
         # support text only generation
         if images is None:
             return super().__call__(text, **kwargs)