Skip to content

Commit

Permalink
Add support for text only inference
Browse files Browse the repository at this point in the history
  • Loading branch information
yonigozlan committed Oct 17, 2024
1 parent 6071fc3 commit 17903d1
Showing 1 changed file with 34 additions and 23 deletions.
57 changes: 34 additions & 23 deletions src/transformers/pipelines/image_text_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,11 @@ def __call__(
chats = [Chat(chat, image) for chat, image in zip(text, images)] # 🐈 🐈 🐈
return super().__call__(chats, **kwargs)

# If we are not in chat mode, we need both images and text
if images is None or text is None:
raise ValueError("You must provide both images and text when not using chat templates.")
# support text only generation
if images is None:
return super().__call__(text, **kwargs)
if text is None:
raise ValueError("You must provide text for this pipeline.")

if not isinstance(images, (list, tuple)):
images = [images]
Expand Down Expand Up @@ -386,28 +388,37 @@ def preprocess(self, inputs=None, timeout=None, continue_final_message=None, pro
processing_kwargs["legacy"] = False
processing_kwargs = {k: v for k, v in processing_kwargs.items() if v is not None}

images = inputs.images

if isinstance(inputs, Chat):
# If the user passes a chat that ends in an assistant message, we treat it as a prefill by default
# because very few models support multiple separate, consecutive assistant messages
if continue_final_message is None:
continue_final_message = inputs.messages[-1]["role"] == "assistant"
text = self.processor.apply_chat_template(
inputs.messages,
add_generation_prompt=not continue_final_message,
continue_final_message=continue_final_message,
return_tensors=self.framework,
)
# In case we only have text inputs
if isinstance(inputs, (list, tuple, str)):
images = None
text = inputs
inputs_text = inputs
else:
text = inputs.text
inputs_text = inputs.text

if not isinstance(images, (list, tuple)):
images = load_image(images, timeout=timeout)
else:
images = [load_image(image, timeout=timeout) for image in images]
# We have an ImageText or Chat inputs
images = inputs.images
if len(images) > 0:
if not isinstance(images, (list, tuple)):
images = load_image(images, timeout=timeout)
else:
images = [load_image(image, timeout=timeout) for image in images]
else:
images = None

if isinstance(inputs, Chat):
# If the user passes a chat that ends in an assistant message, we treat it as a prefill by default
# because very few models support multiple separate, consecutive assistant messages
if continue_final_message is None:
continue_final_message = inputs.messages[-1]["role"] == "assistant"
text = self.processor.apply_chat_template(
inputs.messages,
add_generation_prompt=not continue_final_message,
continue_final_message=continue_final_message,
return_tensors=self.framework,
)
inputs_text = inputs
else:
text = inputs.text
inputs_text = inputs.text

# if batched text inputs, we set padding to True unless specified otherwise
if isinstance(text, (list, tuple)) and len(text) > 1:
Expand Down

0 comments on commit 17903d1

Please sign in to comment.