Skip to content

Commit

Permalink
Make GPT2 traceable in meta state (#28054)
Browse files Browse the repository at this point in the history
* Put device in tensor constructor instead of to()

* Fix copy
  • Loading branch information
kwen2501 authored Dec 15, 2023
1 parent e2b6df7 commit 74cae67
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def _attn(self, query, key, value, attention_mask=None, head_mask=None):
mask_value = torch.finfo(attn_weights.dtype).min
# Need to be a tensor, otherwise we get error: `RuntimeError: expected scalar type float but found double`.
# Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
mask_value = torch.full([], mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
mask_value = torch.full([], mask_value, dtype=attn_weights.dtype, device=attn_weights.device)
attn_weights = torch.where(causal_mask, attn_weights.to(attn_weights.dtype), mask_value)

if attention_mask is not None:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/gpt2/modeling_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def _attn(self, query, key, value, attention_mask=None, head_mask=None):
mask_value = torch.finfo(attn_weights.dtype).min
# Need to be a tensor, otherwise we get error: `RuntimeError: expected scalar type float but found double`.
# Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
mask_value = torch.full([], mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
mask_value = torch.full([], mask_value, dtype=attn_weights.dtype, device=attn_weights.device)
attn_weights = torch.where(causal_mask, attn_weights.to(attn_weights.dtype), mask_value)

if attention_mask is not None:
Expand Down

0 comments on commit 74cae67

Please sign in to comment.