From f7a6f93a778521a1ecb495428ea819d12116bd77 Mon Sep 17 00:00:00 2001
From: root <tranchung163@gmail.com>
Date: Fri, 22 Mar 2024 18:42:46 -0700
Subject: [PATCH 01/17] added red-pajama to model list

---
 text_generation/causal_lm/cpp/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
index 0ad2ffe928..9d67aba1af 100644
--- a/text_generation/causal_lm/cpp/README.md
+++ b/text_generation/causal_lm/cpp/README.md
@@ -141,6 +141,6 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro
    2. https://huggingface.co/microsoft/phi-1_5
 9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1)
 10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
-
+11. [redpajama-3b-chat](https://huggingface.co/ikala/redpajama-3b-chat)
 
 This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.

From fdf71566c85acdd84a0e81fa949bb89768ca5a6c Mon Sep 17 00:00:00 2001
From: root <tranchung163@gmail.com>
Date: Fri, 22 Mar 2024 19:24:01 -0700
Subject: [PATCH 02/17] added github workflow for redpajama-3b-chat

---
 .github/workflows/causal_lm_cpp.yml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index e58d4e67ee..0653641db6 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -351,3 +351,29 @@ jobs:
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
           echo Phi-1_5 passed
+  cpp-beam_search_causal_lm-redpajama-3b-chat:
+    runs-on: ubuntu-22.04-8-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.10
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.0/linux/l_openvino_toolkit_ubuntu22_2024.0.0.14509.34caeefd078_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id ikala/redpajama-3b-chat --output_dir ./redpajama-3b-chat/ --precision FP16 &
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
+          cmake --build ./build/ --config Release -j 15
+          wait
+      - name: Run Generation
+        run: |
+          source ./ov/setupvars.sh
+          convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./redpajama-3b-chat/pytorch/dldt/FP16/ 69 > ./pred.txt 

From d2f345b5c84a2f585b08e5f5ed9a7767b50a9509 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Fri, 5 Apr 2024 11:45:25 -0700
Subject: [PATCH 03/17] Update causal_lm_cpp.yml

Added comparison with Hugging Face
---
 .github/workflows/causal_lm_cpp.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 0653641db6..4771b17069 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -377,3 +377,19 @@ jobs:
           source ./ov/setupvars.sh
           convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./redpajama-3b-chat/pytorch/dldt/FP16/ 69 > ./pred.txt 
+      - name: Compare
+        run: |
+          python -c "
+          import transformers
+          with open('pred.txt', 'r') as file:
+              predictions = file.read()
+          tokenizer = transformers.LlamaTokenizer.from_pretrained('ikala/redpajama-3b-chat')
+          tokenized = tokenizer('69', return_tensors='pt')
+          for beam in transformers.LlamaForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
+              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
+              idx = predictions.find(ref)
+              if -1 == idx:
+                  raise RuntimeError(f'Missing "{ref=}" from predictions')
+              predictions = predictions[:idx] + predictions[idx + len(ref):]
+          "
+          echo 69 passed

From 6878081a0099130704114f77dba6cce42046b73b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 8 Apr 2024 15:08:20 +0400
Subject: [PATCH 04/17] Apply suggestions from code review

---
 .github/workflows/causal_lm_cpp.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 56d73a8b9f..70e3d46119 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -378,7 +378,7 @@ jobs:
           "
           echo Phi-1_5 passed
   cpp-beam_search_causal_lm-redpajama-3b-chat:
-    runs-on: ubuntu-22.04-8-cores
+    runs-on: ubuntu-20.04-8-cores
     steps:
       - uses: actions/checkout@v4
         with:
@@ -389,14 +389,14 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.0/linux/l_openvino_toolkit_ubuntu22_2024.0.0.14509.34caeefd078_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.0/linux/l_openvino_toolkit_ubuntu20_2024.0.0.14509.34caeefd078_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
           python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id ikala/redpajama-3b-chat --output_dir ./redpajama-3b-chat/ --precision FP16 &
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
-          cmake --build ./build/ --config Release -j 15
+          cmake --build ./build/ --config Release -j
           wait
       - name: Run Generation
         run: |

From 576bb7aa1e629907a0c39f6d87873a276059bef6 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 8 Apr 2024 15:15:39 +0400
Subject: [PATCH 05/17] Apply suggestions from code review

---
 .github/workflows/causal_lm_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 70e3d46119..a17a70c723 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -385,7 +385,7 @@ jobs:
           submodules: recursive
       - uses: actions/setup-python@v4
         with:
-          python-version: 3.10
+          python-version: 3.8
       - name: Install OpenVINO
         run: |
           mkdir ./ov/

From d8e38a0de7d349c58fb5c92c49edd47f2caa77b7 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Fri, 12 Apr 2024 18:30:24 -0700
Subject: [PATCH 06/17] Change to GPTNoeXTokenizer

---
 .github/workflows/causal_lm_cpp.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index a17a70c723..f91b2309e5 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -409,9 +409,9 @@ jobs:
           import transformers
           with open('pred.txt', 'r') as file:
               predictions = file.read()
-          tokenizer = transformers.LlamaTokenizer.from_pretrained('ikala/redpajama-3b-chat')
+          tokenizer = transformers.GPTNeoXTokenizer.from_pretrained('ikala/redpajama-3b-chat')
           tokenized = tokenizer('69', return_tensors='pt')
-          for beam in transformers.LlamaForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
+          for beam in transformers.GPTNeoXForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
               ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
               idx = predictions.find(ref)
               if -1 == idx:

From 5e02cee241868d2c3b4fa24b36731e74393b25c0 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Mon, 15 Apr 2024 17:13:51 -0700
Subject: [PATCH 07/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 2fec602392..00d52ad226 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -454,13 +454,13 @@ jobs:
           import transformers
           with open('pred.txt', 'r') as file:
               predictions = file.read()
-          tokenizer = transformers.GPTNeoXTokenizer.from_pretrained('ikala/redpajama-3b-chat')
+          tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
           tokenized = tokenizer('69', return_tensors='pt')
-          for beam in transformers.GPTNeoXForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
+          for beam in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
               ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
               idx = predictions.find(ref)
               if -1 == idx:
                   raise RuntimeError(f'Missing "{ref=}" from predictions')
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo redpajama-3b passed
\ No newline at end of file
+          echo redpajama-3b passed

From 1a5c57ef64e8fd16991c47c4a660850359301e82 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Tue, 16 Apr 2024 00:09:09 -0700
Subject: [PATCH 08/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 00d52ad226..bf07f88515 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -452,12 +452,12 @@ jobs:
         run: |
           python -c "
           import transformers
-          with open('pred.txt', 'r') as file:
+          with open('pred_greedy.txt', 'r') as file:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
-          tokenized = tokenizer('69', return_tensors='pt')
-          for beam in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
-              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
+          tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
+          for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False):
+              ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
               idx = predictions.find(ref)
               if -1 == idx:
                   raise RuntimeError(f'Missing "{ref=}" from predictions')

From 0d6222246ce87a0d9701ec13bc4b60421eb21dbe Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Tue, 16 Apr 2024 09:46:57 -0700
Subject: [PATCH 09/17] Fixed causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index bf07f88515..8213fb6c44 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -452,7 +452,7 @@ jobs:
         run: |
           python -c "
           import transformers
-          with open('pred_greedy.txt', 'r') as file:
+          with open('pred.txt', 'r') as file:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
           tokenized = tokenizer('Alan Turing was a', return_tensors='pt')

From f91e7a8a0e444ed986fba5a121f5dae6abe36b51 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Thu, 18 Apr 2024 08:48:22 -0700
Subject: [PATCH 10/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 8213fb6c44..78b69ee1a1 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -452,15 +452,24 @@ jobs:
         run: |
           python -c "
           import transformers
-          with open('pred.txt', 'r') as file:
+          with open('pred_greedy.txt', 'r') as file:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
-          tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
-          for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False):
-              ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
-              idx = predictions.find(ref)
-              if -1 == idx:
-                  raise RuntimeError(f'Missing "{ref=}" from predictions')
-              predictions = predictions[:idx] + predictions[idx + len(ref):]
+          model = transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat')
+          tokenized_input = tokenizer("Alan Turing was a", return_tensors='pt')
+          generated_texts = []
+          for output in model.generate(**tokenized_input, max_length=100, do_sample=False):
+              generated_text = tokenizer.decode(output[tokenized_input['input_ids'].numel():], skip_special_tokens=True).strip()
+              generated_texts.append(generated_text)
+          found_match = False
+          for sentence in predictions.split('.'):
+              for generated_text in generated_texts:
+                  if sentence.strip() in generated_text:
+                      found_match = True
+                      break
+              if found_match:
+                  break
+          if not found_match:
+              raise RuntimeError(f'Missing a sentence from predictions')
           "
           echo redpajama-3b passed

From f34c91d46abc89e5e74a09adab700245db887304 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Mon, 22 Apr 2024 16:42:46 -0700
Subject: [PATCH 11/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 78b69ee1a1..9ace507b9a 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -456,7 +456,8 @@ jobs:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
           model = transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat')
-          tokenized_input = tokenizer("Alan Turing was a", return_tensors='pt')
+          input_text = 'Alan Turin was a'
+          tokenized_input = tokenizer(input_text, return_tensors='pt')
           generated_texts = []
           for output in model.generate(**tokenized_input, max_length=100, do_sample=False):
               generated_text = tokenizer.decode(output[tokenized_input['input_ids'].numel():], skip_special_tokens=True).strip()

From 5fdbb86a3029b2ee447750b7fd870ae124498ab7 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Mon, 22 Apr 2024 22:40:40 -0700
Subject: [PATCH 12/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 3e3c6e5d3a..24e0c66893 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -427,7 +427,7 @@ jobs:
         run: |
           python -c "
           import transformers
-          with open('pred_greedy.txt', 'r') as file:
+          with open('pred.txt', 'r') as file:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
           model = transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat')

From d6dc09624ae70e57843c1853678231cdf66f4b0d Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Mon, 22 Apr 2024 23:31:08 -0700
Subject: [PATCH 13/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 24e0c66893..6d8c2555f6 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -431,7 +431,7 @@ jobs:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
           model = transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat')
-          input_text = 'Alan Turin was a'
+          input_text = '69'
           tokenized_input = tokenizer(input_text, return_tensors='pt')
           generated_texts = []
           for output in model.generate(**tokenized_input, max_length=100, do_sample=False):
@@ -448,4 +448,4 @@ jobs:
           if not found_match:
               raise RuntimeError(f'Missing a sentence from predictions')
           "
-          echo redpajama-3b passed
+          echo "69" passed

From fab6966e12819688a288ce5fec2a4269cfdc4d7d Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Tue, 23 Apr 2024 00:38:11 -0700
Subject: [PATCH 14/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 6d8c2555f6..22c6b167d7 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -430,22 +430,12 @@ jobs:
           with open('pred.txt', 'r') as file:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
-          model = transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat')
-          input_text = '69'
-          tokenized_input = tokenizer(input_text, return_tensors='pt')
-          generated_texts = []
-          for output in model.generate(**tokenized_input, max_length=100, do_sample=False):
-              generated_text = tokenizer.decode(output[tokenized_input['input_ids'].numel():], skip_special_tokens=True).strip()
-              generated_texts.append(generated_text)
-          found_match = False
-          for sentence in predictions.split('.'):
-              for generated_text in generated_texts:
-                  if sentence.strip() in generated_text:
-                      found_match = True
-                      break
-              if found_match:
-                  break
-          if not found_match:
-              raise RuntimeError(f'Missing a sentence from predictions')
+          tokenized = tokenizer('69', return_tensors='pt')
+          for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False):
+              ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
+              idx = predictions.find(ref)
+              if -1 == idx:
+                  raise RuntimeError(f'Missing "{ref}" from predictions')
+              predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
           echo "69" passed

From 9684adf1619bf8ff1d91ea982b4b3fe795eefb24 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Tue, 23 Apr 2024 00:52:42 -0700
Subject: [PATCH 15/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 22c6b167d7..1f2cd34492 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -422,15 +422,15 @@ jobs:
         run: |
           source ./ov/setupvars.sh
           convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
-          timeout 50s ./build/beam_search_causal_lm ./redpajama-3b-chat/pytorch/dldt/FP16/ 69 > ./pred.txt 
+          timeout 50s ./build/greedy_causal_lm ./redpajama-3b-chat/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_greedy.txt 
       - name: Compare
         run: |
           python -c "
           import transformers
-          with open('pred.txt', 'r') as file:
+          with open('pred_greedy', 'r') as file:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
-          tokenized = tokenizer('69', return_tensors='pt')
+          tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
           for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False):
               ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
               idx = predictions.find(ref)
@@ -438,4 +438,4 @@ jobs:
                   raise RuntimeError(f'Missing "{ref}" from predictions')
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo "69" passed
+          echo "Alan Turing was a" passed

From 70537417024633aa4dc07e72d473b193e4177927 Mon Sep 17 00:00:00 2001
From: Ngoc Chung Tran <90120932+tranchung163@users.noreply.github.com>
Date: Tue, 23 Apr 2024 01:02:59 -0700
Subject: [PATCH 16/17] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 1f2cd34492..b200ef2995 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -397,7 +397,7 @@ jobs:
           "
           echo Phi-1_5 passed
           
-  cpp-beam_search_causal_lm-redpajama-3b-chat:
+  cpp-greedy_causal_lm-redpajama-3b-chat:
     runs-on: ubuntu-20.04-8-cores
     steps:
       - uses: actions/checkout@v4
@@ -427,7 +427,7 @@ jobs:
         run: |
           python -c "
           import transformers
-          with open('pred_greedy', 'r') as file:
+          with open('pred_greedy.txt', 'r') as file:
               predictions = file.read()
           tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
           tokenized = tokenizer('Alan Turing was a', return_tensors='pt')

From d3c7936754a0e1f37a19666fcaa660a9c75290ff Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 23 Apr 2024 14:44:00 +0400
Subject: [PATCH 17/17] Update .github/workflows/causal_lm_cpp.yml

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
---
 .github/workflows/causal_lm_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index b200ef2995..a34d6e4013 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -398,7 +398,7 @@ jobs:
           echo Phi-1_5 passed
           
   cpp-greedy_causal_lm-redpajama-3b-chat:
-    runs-on: ubuntu-20.04-8-cores
+    runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v4
         with: