Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Cyrilvallez committed Dec 22, 2024
1 parent 8f38f58 commit 9f4de68
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
3 changes: 3 additions & 0 deletions src/transformers/integrations/flash_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ def flash_attention_forward(
else:
target_dtype = next(layer for layer in module.modules() if isinstance(layer, torch.nn.Linear)).weight.dtype

# FA2 always relies on the value set in the module, so remove it if present in kwargs
kwargs.pop("is_causal", None)

attn_output = _flash_attention_forward(
query,
key,
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/gpt2/modeling_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,9 @@ def forward(
shape_q = (*query_states.shape[:-1], -1, self.head_dim)
shape_kv = (*key_states.shape[:-1], -1, self.head_dim)

query_states = query_states.reshape(shape_q).transpose(1, 2)
key_states = key_states.reshape(shape_kv).transpose(1, 2)
value_states = value_states.reshape(shape_kv).transpose(1, 2)
query_states = query_states.view(shape_q).transpose(1, 2)
key_states = key_states.view(shape_kv).transpose(1, 2)
value_states = value_states.view(shape_kv).transpose(1, 2)

if layer_past is not None:
past_key, past_value = layer_past
Expand Down

0 comments on commit 9f4de68

Please sign in to comment.