From 15ea92723c6c5852c62095c50650594b458edc5c Mon Sep 17 00:00:00 2001 From: Weves Date: Tue, 5 Sep 2023 12:20:19 -0700 Subject: [PATCH] Try and always use at least one chunk --- backend/danswer/direct_qa/qa_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/backend/danswer/direct_qa/qa_utils.py b/backend/danswer/direct_qa/qa_utils.py index 0a19f5c425e..47c9f6854ee 100644 --- a/backend/danswer/direct_qa/qa_utils.py +++ b/backend/danswer/direct_qa/qa_utils.py @@ -270,6 +270,14 @@ def _get_usable_chunks( usable_chunks.append(chunk) + # try and return at least one chunk if possible. This chunk will + # get truncated later on in the pipeline. This would only occur if + # the first chunk is larger than the token limit (usually due to character + # count -> token count mismatches caused by special characters / non-ascii + # languages) + if not usable_chunks and chunks: + usable_chunks = [chunks[0]] + return usable_chunks