From b58676fe3c5af8bf9845dbe6875a8cbcf4d4b376 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 18 Apr 2024 16:32:31 +0200 Subject: [PATCH] fix --- src/transformers/models/clipseg/modeling_clipseg.py | 1 + src/transformers/models/groupvit/modeling_groupvit.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/transformers/models/clipseg/modeling_clipseg.py b/src/transformers/models/clipseg/modeling_clipseg.py index 06e4c83e7e532b..317f136af713cd 100644 --- a/src/transformers/models/clipseg/modeling_clipseg.py +++ b/src/transformers/models/clipseg/modeling_clipseg.py @@ -736,6 +736,7 @@ def forward( pooled_output = last_hidden_state[ torch.arange(last_hidden_state.shape[0], device=last_hidden_state.device), # We need to get the first position of `eos_token_id` value (`pad_token_ids` might equal to `eos_token_id`) + # Note: we assume the input always has a eos token in each text (i.e. always prepared by clipseg tokenizer) (input_ids.to(dtype=torch.int, device=last_hidden_state.device) == self.eos_token_id) .int() .argmax(dim=-1), diff --git a/src/transformers/models/groupvit/modeling_groupvit.py b/src/transformers/models/groupvit/modeling_groupvit.py index ec383b0fcfa6cb..89b7722cf05c51 100644 --- a/src/transformers/models/groupvit/modeling_groupvit.py +++ b/src/transformers/models/groupvit/modeling_groupvit.py @@ -1118,6 +1118,7 @@ def forward( pooled_output = last_hidden_state[ torch.arange(last_hidden_state.shape[0], device=last_hidden_state.device), # We need to get the first position of `eos_token_id` value (`pad_token_ids` might equal to `eos_token_id`) + # Note: we assume the input always has a eos token in each text (i.e. always prepared by clip tokenizer) (input_ids.to(dtype=torch.int, device=last_hidden_state.device) == self.eos_token_id) .int() .argmax(dim=-1),