diff --git a/docs/source/en/tasks/video_text_to_text.md b/docs/source/en/tasks/video_text_to_text.md index fcc1c86e8bd7ac..3929f7994bdafb 100644 --- a/docs/source/en/tasks/video_text_to_text.md +++ b/docs/source/en/tasks/video_text_to_text.md @@ -47,7 +47,7 @@ model_id = "llava-hf/llava-interleave-qwen-0.5b-hf" processor = LlavaProcessor.from_pretrained(model_id) model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16) -model.to("cuda") +model.to("cuda") # can also be xpu, mps, npu etc. depending on your hardware accelerator ``` Some models directly consume the `