Skip to content

Commit

Permalink
Cohere: Fix copied from (#31213)
Browse files Browse the repository at this point in the history
Update modeling_cohere.py
  • Loading branch information
younesbelkada authored Jun 3, 2024
1 parent 98dd842 commit 924c46d
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/transformers/models/cohere/modeling_cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def forward(
return attn_output, attn_weights, past_key_value


# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 Llama->Cohere
# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 with Llama->Cohere
class CohereFlashAttention2(CohereAttention):
"""
Cohere flash attention module. This module inherits from `CohereAttention` as the weights of the module stays
Expand All @@ -326,6 +326,7 @@ def __init__(self, *args, **kwargs):
# Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()

# Ignore copy
def forward(
self,
hidden_states: torch.Tensor,
Expand Down

0 comments on commit 924c46d

Please sign in to comment.