From 138ff00e708075c43a20b8cec68aac6d7acf9a87 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 18 Apr 2024 16:15:03 +0200 Subject: [PATCH] fix --- .../models/clip/convert_clip_original_pytorch_to_hf.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py b/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py index 41b45d50209974..8aeea004268726 100644 --- a/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py +++ b/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py @@ -125,7 +125,14 @@ def convert_clip_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_pa hf_model.logit_scale = pt_model.logit_scale # Use a `eos_token` so the example is more meaningful - input_ids = torch.tensor([[config.text_config.bos_token_id] + list(range(3, 77)) + [config.text_config.eos_token_id] + [config.text_config.pad_token_id]]) + input_ids = torch.tensor( + [ + [config.text_config.bos_token_id] + + list(range(3, 77)) + + [config.text_config.eos_token_id] + + [config.text_config.pad_token_id] + ] + ) pixel_values = torch.randn(1, 3, 224, 224) hf_outputs = hf_model(input_ids=input_ids, pixel_values=pixel_values, return_dict=True)