From 6acefae785290d67b0c7993b4d5300376b4e1ee0 Mon Sep 17 00:00:00 2001
From: Letong Han <106566639+letonghan@users.noreply.github.com>
Date: Wed, 11 Dec 2024 11:35:05 +0800
Subject: [PATCH] [LLM] Modify Params to Support Falcon3 Model (#1027)

* modify params to support falcon3 model

---------

Signed-off-by: letonghan <letong.han@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Zhenzhong Xu <zhenzhong.xu@intel.com>
---
 comps/llms/text-generation/native/langchain/README.md | 5 +++--
 comps/llms/text-generation/native/langchain/llm.py    | 7 ++++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/comps/llms/text-generation/native/langchain/README.md b/comps/llms/text-generation/native/langchain/README.md
index 3ce428aa6..ca3ef4c83 100644
--- a/comps/llms/text-generation/native/langchain/README.md
+++ b/comps/llms/text-generation/native/langchain/README.md
@@ -10,6 +10,8 @@ If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file
 
 In order to start Native LLM service, you need to setup the following environment variables first.
 
+For LLM model, both `Qwen` and `Falcon3` models are supported. Users can set different models by changing the `LLM_NATIVE_MODEL` below.
+
 ```bash
 export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct"
 export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
@@ -19,8 +21,7 @@ export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
 
 ```bash
 cd ../../../../../
-docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain
-Dockerfile .
+docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain/Dockerfile .
 ```
 
 To start a docker container, you have two options:
diff --git a/comps/llms/text-generation/native/langchain/llm.py b/comps/llms/text-generation/native/langchain/llm.py
index 6008a91b8..f04b1db9d 100644
--- a/comps/llms/text-generation/native/langchain/llm.py
+++ b/comps/llms/text-generation/native/langchain/llm.py
@@ -73,7 +73,12 @@ def generate(
     encode_t0 = time.perf_counter()
 
     # Tokenization
-    input_tokens = tokenizer.batch_encode_plus(input_query, return_tensors="pt", padding=True)
+    input_tokens = tokenizer.batch_encode_plus(
+        input_query,
+        return_tensors="pt",
+        padding=True,
+        return_token_type_ids=False,  # token_type_ids is not needed for falcon-three model
+    )
     encode_duration = time.perf_counter() - encode_t0
     logger.info(f"[llm - generate] input tokenized: {input_tokens}")