diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 00fe3de115..a125194250 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -506,12 +506,6 @@ def forward( "attention_mask": attention_mask, } - if "position_ids" in self.input_names and position_ids is None: - position_ids = attention_mask.long().cumsum(-1) - 1 - position_ids.masked_fill_(attention_mask == 0, 1) - if past_key_values: - position_ids = position_ids[:, -1].unsqueeze(-1) - if "position_ids" in self.input_names or not self.input_names: inputs["position_ids"] = position_ids diff --git a/tests/ipex/test_modeling.py b/tests/ipex/test_modeling.py index 68119287d8..c46ce1cdc2 100644 --- a/tests/ipex/test_modeling.py +++ b/tests/ipex/test_modeling.py @@ -32,7 +32,6 @@ set_seed, ) -from optimum.exporters.onnx import MODEL_TYPES_REQUIRING_POSITION_IDS from optimum.intel import ( IPEXModel, IPEXModelForAudioClassification, @@ -236,11 +235,8 @@ def test_compare_to_transformers(self, model_arch): return_tensors="pt", return_token_type_ids=False if model_arch in ("llama", "llama2") else None, ) - position_ids = None - if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS: - input_shape = tokens["input_ids"].shape - position_ids = torch.arange(0, input_shape[-1], dtype=torch.long).unsqueeze(0).view(-1, input_shape[-1]) - outputs = ipex_model(**tokens, position_ids=position_ids) + inputs = ipex_model.prepare_inputs_for_generation(**tokens) + outputs = ipex_model(**inputs) self.assertIsInstance(outputs.logits, torch.Tensor) self.assertIsInstance(outputs.past_key_values, (tuple, list)) @@ -263,6 +259,22 @@ def test_pipeline(self, model_arch): self.assertEqual(pipe.device, model.device) self.assertTrue(all("This is a sample" in item["generated_text"] for item in outputs)) + @parameterized.expand(SUPPORTED_ARCHITECTURES) + def test_assisted_decoding(self, model_arch): + model_id = MODEL_NAMES[model_arch] + tokenizer = AutoTokenizer.from_pretrained(model_id) + ipex_model = IPEXModelForCausalLM.from_pretrained(model_id, export=True) + transformers_model = AutoModelForCausalLM.from_pretrained(model_id) + tokens = tokenizer("This is a sample input", return_tensors="pt") + ipex_output = ipex_model.generate(**tokens, do_sample=False) + ipex_output_assisted = ipex_model.generate(**tokens, do_sample=False, assistant_model=transformers_model) + transformers_output = transformers_model.generate(**tokens, do_sample=False) + transformers_output_assisted = transformers_model.generate( + **tokens, do_sample=False, assistant_model=ipex_model + ) + self.assertTrue(torch.equal(ipex_output, ipex_output_assisted)) + self.assertTrue(torch.equal(transformers_output, transformers_output_assisted)) + @parameterized.expand( grid_parameters( {