checking if attention mask present for ignoring pad tokens in ffn (#1188

)
mosaicml · May 9, 2024 · 0c7bc2a · 0c7bc2a
1 parent ac563e6
commit 0c7bc2a
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/llmfoundry/models/layers/blocks.py b/llmfoundry/models/layers/blocks.py
@@ -221,11 +221,11 @@ def apply_ffn(
         """
         batch_size, seq_len = m.size()[:2]
         indices = None
-        if not self.use_pad_tok_in_ffn:
+        if not self.use_pad_tok_in_ffn and attention_mask is not None:
             assert unpad_input is not None
             m, indices, _, _ = unpad_input(m, attention_mask)
         n = self.ffn(m)
-        if not self.use_pad_tok_in_ffn:
+        if not self.use_pad_tok_in_ffn and attention_mask is not None:
             assert pad_input is not None
             n = pad_input(n, indices, batch_size, seq_len)
         return n