From 7d80685d44d2c42d22ee2ee097fdf07571fff1c6 Mon Sep 17 00:00:00 2001 From: Felix Fehse <155464791+FelixFehse@users.noreply.github.com> Date: Thu, 27 Jun 2024 17:51:15 +0200 Subject: [PATCH] fix: correct max highlight range (#936) --- CHANGELOG.md | 1 + .../examples/qa/multiple_chunk_retriever_qa.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c52c5b5c..6d634f373 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - `PromptTemplate.to_rich_prompt` now always returns an empty list for prompt ranges that are empty. - `SingleChunkQa` no longer crashes if given an empty input and a specific prompt template. This did not affect users who used models provided in `core`. - Added default values for `labels` and `metadata` for `EvaluationOverview` and `RunOverview` +- In the `MultipleChunkRetrieverQa`, text-highlight start and end points are now restricted to within the text length of the respective chunk. ### Deprecations ... diff --git a/src/intelligence_layer/examples/qa/multiple_chunk_retriever_qa.py b/src/intelligence_layer/examples/qa/multiple_chunk_retriever_qa.py index 4d0ef25de..9b4ada941 100644 --- a/src/intelligence_layer/examples/qa/multiple_chunk_retriever_qa.py +++ b/src/intelligence_layer/examples/qa/multiple_chunk_retriever_qa.py @@ -156,10 +156,15 @@ def _combine_input_texts( start_indices: list[int] = [] combined_text = "" for i, chunk in enumerate(chunks): - combined_text += source_appendix.format(i=i + 1) start_indices.append(len(combined_text)) - combined_text += chunk + "\n\n" - return (TextChunk(combined_text.strip()), start_indices) + + c = source_appendix.format(i=i + 1) + c += chunk + "\n\n" + c = c.strip() + if i != 0: + c = " " + c + combined_text += c + return (TextChunk(combined_text), start_indices) @staticmethod def _get_highlights_per_chunk( @@ -182,7 +187,10 @@ def _get_highlights_per_chunk( end=( highlight.end - current_start if isinstance(next_start, float) - else min(next_start, highlight.end - current_start) + else min( + next_start - current_start, + highlight.end - current_start, + ) ), score=highlight.score, )