diff --git a/src/transformers/models/llava/modeling_llava.py b/src/transformers/models/llava/modeling_llava.py index 3a7dbc198e3732..65dee86ae234f8 100644 --- a/src/transformers/models/llava/modeling_llava.py +++ b/src/transformers/models/llava/modeling_llava.py @@ -369,12 +369,12 @@ def forward( >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) - >>> inputs = processor(text=text, images=image, return_tensors="pt") + >>> inputs = processor(text=prompt, images=image, return_tensors="pt") >>> # Generate >>> generate_ids = model.generate(**inputs, max_length=30) - >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] - "There seems to be a stop sign" + >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + "\nUSER: What's the content of the image?\nASSISTANT: The image features a stop sign on a street corner" ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions