Merge branch 'main' into remove_example_gateway

opea-project · Dec 11, 2024 · 8da4cec · 8da4cec
2 parents 83ae2eb + 6acefae
commit 8da4cec
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 3 deletions.
diff --git a/comps/llms/text-generation/native/langchain/README.md b/comps/llms/text-generation/native/langchain/README.md
@@ -10,6 +10,8 @@ If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file
 
 In order to start Native LLM service, you need to setup the following environment variables first.
 
+For LLM model, both `Qwen` and `Falcon3` models are supported. Users can set different models by changing the `LLM_NATIVE_MODEL` below.
+
 ```bash
 export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct"
 export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
@@ -19,8 +21,7 @@ export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
 
 ```bash
 cd ../../../../../
-docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain
-Dockerfile .
+docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain/Dockerfile .
 ```
 
 To start a docker container, you have two options:

diff --git a/comps/llms/text-generation/native/langchain/llm.py b/comps/llms/text-generation/native/langchain/llm.py
@@ -73,7 +73,12 @@ def generate(
     encode_t0 = time.perf_counter()
 
     # Tokenization
-    input_tokens = tokenizer.batch_encode_plus(input_query, return_tensors="pt", padding=True)
+    input_tokens = tokenizer.batch_encode_plus(
+        input_query,
+        return_tensors="pt",
+        padding=True,
+        return_token_type_ids=False,  # token_type_ids is not needed for falcon-three model
+    )
     encode_duration = time.perf_counter() - encode_t0
     logger.info(f"[llm - generate] input tokenized: {input_tokens}")