diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index c4157797e74..291a3b08335 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -42,7 +42,6 @@ jobs: run: | pip install --upgrade pip pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install git+https://github.com/huggingface/transformers pip install .[tests,onnxruntime] - name: Test with pytest (in series) diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 53e6a676e6f..63f0275464f 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -207,9 +207,10 @@ def codegen_wrapped_scaled_dot_product( # causal_mask is always [True, ..., True] otherwise, so executing this # is unnecessary if query_length > 1: - if not check_if_transformers_greater("4.44.99"): - causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) + causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to( + torch.bool + ) causal_mask = torch.where(causal_mask, 0, mask_value) @@ -219,7 +220,6 @@ def codegen_wrapped_scaled_dot_product( # we use torch.min to avoid having tensor(-inf) attention_mask = torch.min(causal_mask, attention_mask) else: - attention_mask = attention_mask[:, :, :, : key.shape[-2]] sdpa_result = torch.nn.functional.scaled_dot_product_attention( @@ -229,7 +229,6 @@ def codegen_wrapped_scaled_dot_product( return sdpa_result, None - # Adapted from transformers.models.opt.modeling_opt.OPTAttention.forward def opt_forward( self, diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index 378fdbb6dbb..bda3ec98d9a 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -724,6 +724,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]): super()._save_pretrained(save_directory) self.generation_config.save_pretrained(save_directory) + class ORTGPTBigCodeForCausalLM(ORTModelForCausalLM): # Adapted from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM.prepare_inputs_for_generation def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): diff --git a/setup.py b/setup.py index 231dc9110e3..c961cf973cb 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,8 @@ REQUIRED_PKGS = [ "coloredlogs", "sympy", - "transformers[sentencepiece]>=4.29,<4.46.0", + "transformers @ git+https://github.com/huggingface/transformers.git", + # "transformers[sentencepiece]>=4.29,<4.46.0", "torch>=1.11", "packaging", "numpy<2.0", # transformers requires numpy<2.0 https://github.com/huggingface/transformers/pull/31569