From 56f8ebc709f84087c8ab5d1f26478b6885673164 Mon Sep 17 00:00:00 2001
From: "Yao, Qing" <qing.yao@intel.com>
Date: Wed, 23 Oct 2024 14:47:05 +0800
Subject: [PATCH 1/5] [Codegen] Replace codegen default Model to
 Qwen/Qwen2.5-Coder-7B-Instruct.

Signed-off-by: Yao, Qing <qing.yao@intel.com>
---
 CodeGen/README.md                                           | 6 +++---
 CodeGen/docker_compose/intel/cpu/xeon/README.md             | 2 +-
 CodeGen/docker_compose/intel/hpu/gaudi/README.md            | 2 +-
 CodeGen/docker_compose/set_env.sh                           | 2 +-
 CodeGen/kubernetes/intel/README.md                          | 2 +-
 CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml     | 2 +-
 CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml     | 2 +-
 CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml   | 2 +-
 CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml    | 2 +-
 ProductivitySuite/docker_compose/intel/cpu/xeon/README.md   | 2 +-
 ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh  | 2 +-
 .../kubernetes/intel/cpu/xeon/manifest/codegen.yaml         | 2 +-
 supported_examples.md                                       | 2 +-
 13 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/CodeGen/README.md b/CodeGen/README.md
index 03288fb2d..03955ff00 100644
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -87,10 +87,10 @@ By default, the LLM model is set to a default value as listed below:
 
 | Service      | Model                                                                           |
 | ------------ | ------------------------------------------------------------------------------- |
-| LLM_MODEL_ID | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) |
+| LLM_MODEL_ID | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) |
 
-[meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) is a gated model that requires submitting an access request through Hugging Face. You can replace it with another model.
-Change the `LLM_MODEL_ID` below for your needs, such as: [Qwen/CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat), [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
+[Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) may be a gated model that requires submitting an access request through Hugging Face. You can replace it with another model.
+Change the `LLM_MODEL_ID` below for your needs, such as:  [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
 
 If you choose to use `meta-llama/CodeLlama-7b-hf` as LLM model, you will need to visit [here](https://huggingface.co/meta-llama/CodeLlama-7b-hf), click the `Expand to review and access` button to ask for model access.
 
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 8bdde1f75..5332d719a 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -105,7 +105,7 @@ export your_no_proxy=${your_no_proxy},"External_Public_IP"
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
index 2a5040ea0..31cfad292 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -85,7 +85,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index d66a120af..dba717b64 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeGen/kubernetes/intel/README.md b/CodeGen/kubernetes/intel/README.md
index be18003b8..a4bb44681 100644
--- a/CodeGen/kubernetes/intel/README.md
+++ b/CodeGen/kubernetes/intel/README.md
@@ -14,7 +14,7 @@
 ```
 cd GenAIExamples/CodeGen/kubernetes/intel/cpu/xeon/manifests
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-export MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
 sed -i "s/meta-llama\/CodeLlama-7b-hf/${MODEL_ID}/g" codegen.yaml
 kubectl apply -f codegen.yaml
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml
index dd1675ce3..8dd3c2b57 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-service
           config:
-            MODEL_ID: meta-llama/CodeLlama-7b-hf
+            MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
             endpoint: /generate
           isDownstreamService: true
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 96cc68266..4e6d8f91c 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -64,7 +64,7 @@ metadata:
     app.kubernetes.io/version: "2.1.0"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "meta-llama/CodeLlama-7b-hf"
+  MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct"
   PORT: "2080"
   HF_TOKEN: "insert-your-huggingface-token-here"
   http_proxy: ""
diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml
index 2e3782057..d9a927e5c 100644
--- a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml
+++ b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-gaudi-svc
           config:
-            MODEL_ID: meta-llama/CodeLlama-7b-hf
+            MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
             endpoint: /generate
           isDownstreamService: true
diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml
index c4a43a7c3..b506d17d4 100644
--- a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml
+++ b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml
@@ -64,7 +64,7 @@ metadata:
     app.kubernetes.io/version: "2.1.0"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "meta-llama/CodeLlama-7b-hf"
+  MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct"
   PORT: "2080"
   HF_TOKEN: "insert-your-huggingface-token-here"
   http_proxy: ""
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
index c5463ad10..892014a5e 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -137,7 +137,7 @@ export COLLECTION_NAME="Conversations"
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID_CODEGEN="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
index 0139b532a..ca369b5ee 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
@@ -8,7 +8,7 @@ export COLLECTION_NAME="Conversations"
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID_CODEGEN="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 6c52c5d92..05788f61a 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -39,7 +39,7 @@ metadata:
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "meta-llama/CodeLlama-7b-hf"
+  MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct"
   PORT: "2080"
   HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
   HF_TOKEN: "insert-your-huggingface-token-here"
diff --git a/supported_examples.md b/supported_examples.md
index e913fd22e..66efb401a 100644
--- a/supported_examples.md
+++ b/supported_examples.md
@@ -65,7 +65,7 @@ This document introduces the supported examples of GenAIExamples. The supported
 
 | Framework                                                                      | LLM                                                                             | Serving                                                         | HW          | Description |
 | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- |
-| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot     |
+| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot     |
 
 ### CodeTrans
 

From a2ecf0ac777c86fbac0faf814180c6c46937f18e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 23 Oct 2024 06:49:59 +0000
Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 CodeGen/README.md     | 6 +++---
 supported_examples.md | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CodeGen/README.md b/CodeGen/README.md
index 03955ff00..013c31d37 100644
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -85,12 +85,12 @@ Currently we support two ways of deploying ChatQnA services with docker compose:
 
 By default, the LLM model is set to a default value as listed below:
 
-| Service      | Model                                                                           |
-| ------------ | ------------------------------------------------------------------------------- |
+| Service      | Model                                                                                   |
+| ------------ | --------------------------------------------------------------------------------------- |
 | LLM_MODEL_ID | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) |
 
 [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) may be a gated model that requires submitting an access request through Hugging Face. You can replace it with another model.
-Change the `LLM_MODEL_ID` below for your needs, such as:  [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
+Change the `LLM_MODEL_ID` below for your needs, such as: [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
 
 If you choose to use `meta-llama/CodeLlama-7b-hf` as LLM model, you will need to visit [here](https://huggingface.co/meta-llama/CodeLlama-7b-hf), click the `Expand to review and access` button to ask for model access.
 
diff --git a/supported_examples.md b/supported_examples.md
index 66efb401a..00ce346f5 100644
--- a/supported_examples.md
+++ b/supported_examples.md
@@ -63,8 +63,8 @@ This document introduces the supported examples of GenAIExamples. The supported
 
 [CodeGen](./CodeGen/README.md) is an example of copilot designed for code generation in Visual Studio Code.
 
-| Framework                                                                      | LLM                                                                             | Serving                                                         | HW          | Description |
-| ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- |
+| Framework                                                                      | LLM                                                                                     | Serving                                                         | HW          | Description |
+| ------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- |
 | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot     |
 
 ### CodeTrans

From c58ea38e1545cab86d2591b64ce8b7476adc379b Mon Sep 17 00:00:00 2001
From: "Yao, Qing" <qing.yao@intel.com>
Date: Thu, 24 Oct 2024 11:24:32 +0800
Subject: [PATCH 3/5] Fix docker compose yaml in Productivity Suite.

Signed-off-by: Yao, Qing <qing.yao@intel.com>
---
 .../docker_compose/intel/cpu/xeon/compose.yaml              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 2e56d6584..00ef93d3b 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -165,10 +165,10 @@ services:
       https_proxy: ${https_proxy}
       http_proxy: ${http_proxy}
       MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
-      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
       RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
-      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
-      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
     ipc: host
     restart: always
   tgi_service_codegen:

From 902e15111debf22a73427f097098f4c50eaf3d26 Mon Sep 17 00:00:00 2001
From: "Yao, Qing" <qing.yao@intel.com>
Date: Thu, 24 Oct 2024 13:50:46 +0800
Subject: [PATCH 4/5] Fix PORT env in docker compose yaml for Productivity
 Suite.

Signed-off-by: Yao, Qing <qing.yao@intel.com>
---
 ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 00ef93d3b..f6882d9e8 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -166,9 +166,12 @@ services:
       http_proxy: ${http_proxy}
       MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
       EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_PORT: 6006
       RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
       RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      RERANK_SERVER_PORT: 8008
       LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      LLM_SERVER_PORT: 9009          `
     ipc: host
     restart: always
   tgi_service_codegen:

From 6f13da0c3b0b0d63a42774463e417fa20786a631 Mon Sep 17 00:00:00 2001
From: "Yao, Qing" <qing.yao@intel.com>
Date: Mon, 28 Oct 2024 08:40:08 +0800
Subject: [PATCH 5/5] Revert changes in Productivity Suite.

Signed-off-by: Yao, Qing <qing.yao@intel.com>
---
 .../docker_compose/intel/cpu/xeon/README.md              | 2 +-
 .../docker_compose/intel/cpu/xeon/compose.yaml           | 9 +++------
 .../docker_compose/intel/cpu/xeon/set_env.sh             | 2 +-
 .../kubernetes/intel/cpu/xeon/manifest/codegen.yaml      | 2 +-
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
index 892014a5e..c5463ad10 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -137,7 +137,7 @@ export COLLECTION_NAME="Conversations"
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export LLM_MODEL_ID_CODEGEN="Qwen/Qwen2.5-Coder-7B-Instruct"
+export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index f6882d9e8..2e56d6584 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -165,13 +165,10 @@ services:
       https_proxy: ${https_proxy}
       http_proxy: ${http_proxy}
       MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
-      EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
-      EMBEDDING_SERVER_PORT: 6006
+      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
       RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
-      RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
-      RERANK_SERVER_PORT: 8008
-      LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
-      LLM_SERVER_PORT: 9009          `
+      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
     ipc: host
     restart: always
   tgi_service_codegen:
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
index ca369b5ee..0139b532a 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
@@ -8,7 +8,7 @@ export COLLECTION_NAME="Conversations"
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export LLM_MODEL_ID_CODEGEN="Qwen/Qwen2.5-Coder-7B-Instruct"
+export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 05788f61a..6c52c5d92 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -39,7 +39,7 @@ metadata:
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct"
+  MODEL_ID: "meta-llama/CodeLlama-7b-hf"
   PORT: "2080"
   HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
   HF_TOKEN: "insert-your-huggingface-token-here"