From 6acefae785290d67b0c7993b4d5300376b4e1ee0 Mon Sep 17 00:00:00 2001 From: Letong Han <106566639+letonghan@users.noreply.github.com> Date: Wed, 11 Dec 2024 11:35:05 +0800 Subject: [PATCH] [LLM] Modify Params to Support Falcon3 Model (#1027) * modify params to support falcon3 model --------- Signed-off-by: letonghan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Zhenzhong Xu --- comps/llms/text-generation/native/langchain/README.md | 5 +++-- comps/llms/text-generation/native/langchain/llm.py | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/comps/llms/text-generation/native/langchain/README.md b/comps/llms/text-generation/native/langchain/README.md index 3ce428aa6..ca3ef4c83 100644 --- a/comps/llms/text-generation/native/langchain/README.md +++ b/comps/llms/text-generation/native/langchain/README.md @@ -10,6 +10,8 @@ If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file In order to start Native LLM service, you need to setup the following environment variables first. +For LLM model, both `Qwen` and `Falcon3` models are supported. Users can set different models by changing the `LLM_NATIVE_MODEL` below. + ```bash export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" @@ -19,8 +21,7 @@ export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" ```bash cd ../../../../../ -docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain -Dockerfile . +docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain/Dockerfile . ``` To start a docker container, you have two options: diff --git a/comps/llms/text-generation/native/langchain/llm.py b/comps/llms/text-generation/native/langchain/llm.py index 6008a91b8..f04b1db9d 100644 --- a/comps/llms/text-generation/native/langchain/llm.py +++ b/comps/llms/text-generation/native/langchain/llm.py @@ -73,7 +73,12 @@ def generate( encode_t0 = time.perf_counter() # Tokenization - input_tokens = tokenizer.batch_encode_plus(input_query, return_tensors="pt", padding=True) + input_tokens = tokenizer.batch_encode_plus( + input_query, + return_tensors="pt", + padding=True, + return_token_type_ids=False, # token_type_ids is not needed for falcon-three model + ) encode_duration = time.perf_counter() - encode_t0 logger.info(f"[llm - generate] input tokenized: {input_tokens}")