From ea507987cfdd6ce367883fc9ea13cb76578899ae Mon Sep 17 00:00:00 2001
From: jiqing-feng <jiqing.feng@intel.com>
Date: Tue, 5 Mar 2024 08:21:13 -0500
Subject: [PATCH 1/3] change llama test model to cover MQA

---
 tests/generation/test_modeling.py | 2 +-
 tests/ipex/test_inference.py      | 2 +-
 tests/ipex/test_modeling.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/generation/test_modeling.py b/tests/generation/test_modeling.py
index b97fd66a83..0043baa99b 100644
--- a/tests/generation/test_modeling.py
+++ b/tests/generation/test_modeling.py
@@ -30,7 +30,7 @@
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "mistral": "echarlaix/tiny-random-mistral",
-    "llama": "fxmarty/tiny-llama-fast-tokenizer",
+    "llama": "Jiqing/tiny_random_llama2",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
 }
 
diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py
index bc1890453d..0d6e2e1554 100644
--- a/tests/ipex/test_inference.py
+++ b/tests/ipex/test_inference.py
@@ -41,7 +41,7 @@
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
-    "llama": "fxmarty/tiny-llama-fast-tokenizer",
+    "llama": "Jiqing/tiny_random_llama2",
     "opt": "hf-internal-testing/tiny-random-OPTModel",
     "mpt": "hf-internal-testing/tiny-random-MptForCausalLM",
 }
diff --git a/tests/ipex/test_modeling.py b/tests/ipex/test_modeling.py
index ffc2ca6a89..2bb5299a2d 100644
--- a/tests/ipex/test_modeling.py
+++ b/tests/ipex/test_modeling.py
@@ -66,7 +66,7 @@
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJModel",
     "levit": "hf-internal-testing/tiny-random-LevitModel",
-    "llama": "fxmarty/tiny-llama-fast-tokenizer",
+    "llama": "Jiqing/tiny_random_llama2",
     "marian": "sshleifer/tiny-marian-en-de",
     "mbart": "hf-internal-testing/tiny-random-mbart",
     "mistral": "echarlaix/tiny-random-mistral",

From 2102c259443bf58ab8fb4cba1e03a7083173bcb6 Mon Sep 17 00:00:00 2001
From: jiqing-feng <jiqing.feng@intel.com>
Date: Wed, 6 Mar 2024 04:11:19 -0500
Subject: [PATCH 2/3] keep llama and llama2 in tests

---
 tests/generation/test_modeling.py | 4 +++-
 tests/ipex/test_inference.py      | 4 +++-
 tests/ipex/test_modeling.py       | 6 ++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/tests/generation/test_modeling.py b/tests/generation/test_modeling.py
index 0043baa99b..9b637d322d 100644
--- a/tests/generation/test_modeling.py
+++ b/tests/generation/test_modeling.py
@@ -30,7 +30,8 @@
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "mistral": "echarlaix/tiny-random-mistral",
-    "llama": "Jiqing/tiny_random_llama2",
+    "llama": "fxmarty/tiny-llama-fast-tokenizer",
+    "llama2": "Jiqing/tiny_random_llama2",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
 }
 
@@ -54,6 +55,7 @@ class ModelingIntegrationTest(unittest.TestCase):
         "gpt_neo",
         "mistral",
         "llama",
+        "llama2",
         # "gpt_bigcode",
     )
 
diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py
index 0d6e2e1554..e120514506 100644
--- a/tests/ipex/test_inference.py
+++ b/tests/ipex/test_inference.py
@@ -41,7 +41,8 @@
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
-    "llama": "Jiqing/tiny_random_llama2",
+    "llama": "fxmarty/tiny-llama-fast-tokenizer",
+    "llama2": "Jiqing/tiny_random_llama2",
     "opt": "hf-internal-testing/tiny-random-OPTModel",
     "mpt": "hf-internal-testing/tiny-random-MptForCausalLM",
 }
@@ -66,6 +67,7 @@ class IPEXIntegrationTest(unittest.TestCase):
         "gpt_neo",
         # "gpt_bigcode",
         "llama",
+        "llama2",
         "opt",
         "mpt",
     )
diff --git a/tests/ipex/test_modeling.py b/tests/ipex/test_modeling.py
index 2bb5299a2d..fdba164487 100644
--- a/tests/ipex/test_modeling.py
+++ b/tests/ipex/test_modeling.py
@@ -66,7 +66,8 @@
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJModel",
     "levit": "hf-internal-testing/tiny-random-LevitModel",
-    "llama": "Jiqing/tiny_random_llama2",
+    "llama": "fxmarty/tiny-llama-fast-tokenizer",
+    "llama2": "Jiqing/tiny_random_llama2",
     "marian": "sshleifer/tiny-marian-en-de",
     "mbart": "hf-internal-testing/tiny-random-mbart",
     "mistral": "echarlaix/tiny-random-mistral",
@@ -209,6 +210,7 @@ class IPEXModelForCausalLMTest(unittest.TestCase):
         "gpt_neo",
         "gpt_neox",
         "llama",
+        "llama2",
         "mistral",
         # "phi",
         "mpt",
@@ -226,7 +228,7 @@ def test_compare_to_transformers(self, model_arch):
         self.assertTrue(ipex_model.use_cache)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         tokens = tokenizer(
-            "This is a sample", return_tensors="pt", return_token_type_ids=False if model_arch == "llama" else None
+            "This is a sample", return_tensors="pt", return_token_type_ids=False if model_arch in ("llama", "llama2") else None
         )
         position_ids = None
         if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS:

From b8f5d41d92289e496994e69f52486aa812c483b3 Mon Sep 17 00:00:00 2001
From: jiqing-feng <jiqing.feng@intel.com>
Date: Wed, 6 Mar 2024 09:42:20 -0500
Subject: [PATCH 3/3] fix code style

---
 tests/ipex/test_modeling.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/ipex/test_modeling.py b/tests/ipex/test_modeling.py
index fdba164487..03b7d015d1 100644
--- a/tests/ipex/test_modeling.py
+++ b/tests/ipex/test_modeling.py
@@ -228,7 +228,9 @@ def test_compare_to_transformers(self, model_arch):
         self.assertTrue(ipex_model.use_cache)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         tokens = tokenizer(
-            "This is a sample", return_tensors="pt", return_token_type_ids=False if model_arch in ("llama", "llama2") else None
+            "This is a sample",
+            return_tensors="pt",
+            return_token_type_ids=False if model_arch in ("llama", "llama2") else None,
         )
         position_ids = None
         if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS: