fix: correct max highlight range (#936)

Aleph-Alpha · Jun 27, 2024 · 7d80685 · 7d80685
1 parent 8c0b396
commit 7d80685
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@
 - `PromptTemplate.to_rich_prompt` now always returns an empty list for prompt ranges that are empty.
 - `SingleChunkQa` no longer crashes if given an empty input and a specific prompt template. This did not affect users who used models provided in `core`.
 - Added default values for `labels` and `metadata` for `EvaluationOverview` and `RunOverview`
+- In the `MultipleChunkRetrieverQa`, text-highlight start and end points are now restricted to within the text length of the respective chunk.
 
 ### Deprecations 
 ...

diff --git a/src/intelligence_layer/examples/qa/multiple_chunk_retriever_qa.py b/src/intelligence_layer/examples/qa/multiple_chunk_retriever_qa.py
@@ -156,10 +156,15 @@ def _combine_input_texts(
         start_indices: list[int] = []
         combined_text = ""
         for i, chunk in enumerate(chunks):
-            combined_text += source_appendix.format(i=i + 1)
             start_indices.append(len(combined_text))
-            combined_text += chunk + "\n\n"
-        return (TextChunk(combined_text.strip()), start_indices)
+
+            c = source_appendix.format(i=i + 1)
+            c += chunk + "\n\n"
+            c = c.strip()
+            if i != 0:
+                c = " " + c
+            combined_text += c
+        return (TextChunk(combined_text), start_indices)
 
     @staticmethod
     def _get_highlights_per_chunk(
@@ -182,7 +187,10 @@ def _get_highlights_per_chunk(
                         end=(
                             highlight.end - current_start
                             if isinstance(next_start, float)
-                            else min(next_start, highlight.end - current_start)
+                            else min(
+                                next_start - current_start,
+                                highlight.end - current_start,
+                            )
                         ),
                         score=highlight.score,
                     )