diff --git a/comps/llms/text-generation/native/langchain/README.md b/comps/llms/text-generation/native/langchain/README.md index 3ce428aa6..ca3ef4c83 100644 --- a/comps/llms/text-generation/native/langchain/README.md +++ b/comps/llms/text-generation/native/langchain/README.md @@ -10,6 +10,8 @@ If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file In order to start Native LLM service, you need to setup the following environment variables first. +For LLM model, both `Qwen` and `Falcon3` models are supported. Users can set different models by changing the `LLM_NATIVE_MODEL` below. + ```bash export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" @@ -19,8 +21,7 @@ export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" ```bash cd ../../../../../ -docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain -Dockerfile . +docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/langchain/Dockerfile . ``` To start a docker container, you have two options: diff --git a/comps/llms/text-generation/native/langchain/llm.py b/comps/llms/text-generation/native/langchain/llm.py index 6008a91b8..f04b1db9d 100644 --- a/comps/llms/text-generation/native/langchain/llm.py +++ b/comps/llms/text-generation/native/langchain/llm.py @@ -73,7 +73,12 @@ def generate( encode_t0 = time.perf_counter() # Tokenization - input_tokens = tokenizer.batch_encode_plus(input_query, return_tensors="pt", padding=True) + input_tokens = tokenizer.batch_encode_plus( + input_query, + return_tensors="pt", + padding=True, + return_token_type_ids=False, # token_type_ids is not needed for falcon-three model + ) encode_duration = time.perf_counter() - encode_t0 logger.info(f"[llm - generate] input tokenized: {input_tokens}")