From 6dc1bf8d8737f39cd94df2d4672aee98c9f97cea Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:51:56 +0200 Subject: [PATCH 1/2] use bitwise or --- src/transformers/models/mistral/modeling_mistral.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index 475dda72c59295..9747cbe6fb98ae 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -1089,8 +1089,9 @@ def _update_causal_mask( exclude_mask = torch.arange(target_length, device=device) > cache_position.reshape(-1, 1) if self.config.sliding_window is not None: if not using_sliding_window_cache or sequence_length > self.config.sliding_window: - exclude_mask |= torch.arange(target_length, device=device) <= ( - cache_position.reshape(-1, 1) - self.config.sliding_window + exclude_mask.bitwise_or_( + torch.arange(target_length, device=device) + <= (cache_position.reshape(-1, 1) - self.config.sliding_window) ) causal_mask *= exclude_mask causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1) From 90c9e1350f60ff3e91c4c5ce33d8a1186723b042 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Tue, 2 Jul 2024 12:29:32 +0200 Subject: [PATCH 2/2] why is the CI not triggered?