Skip to content

Commit

Permalink
update attention
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Dec 7, 2023
1 parent 698935f commit e370dfd
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/transformers/models/chatglm/modeling_chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,12 +650,12 @@ def _upad_input(self, query_layer, key_layer, value_layer, attention_mask, query
)


# Copied from transformers.models.llama.modeling_llama.LlamaDecoderLayer with Llama->ChatGlm
# Copied from transformers.models.llama.modeling_llama.LlamaDecoderLayer with Llama->ChatGlm, self_attn->self_attention
class ChatGlmDecoderLayer(nn.Module):
def __init__(self, config: ChatGlmConfig):
super().__init__()
self.hidden_size = config.hidden_size
self.self_attn = (
self.self_attention = (
ChatGlmAttention(config=config)
if not getattr(config, "_flash_attn_2_enabled", False)
else ChatGlmFlashAttention2(config=config)
Expand Down Expand Up @@ -698,7 +698,7 @@ def forward(
hidden_states = self.input_layernorm(hidden_states)

# Self Attention
hidden_states, self_attn_weights, present_key_value = self.self_attn(
hidden_states, self_attention_weights, present_key_value = self.self_attention(
hidden_states=hidden_states,
attention_mask=attention_mask,
position_ids=position_ids,
Expand All @@ -718,7 +718,7 @@ def forward(
outputs = (hidden_states,)

if output_attentions:
outputs += (self_attn_weights,)
outputs += (self_attention_weights,)

if use_cache:
outputs += (present_key_value,)
Expand Down

0 comments on commit e370dfd

Please sign in to comment.