Skip to content

Commit

Permalink
Actually use k parameter in new task
Browse files Browse the repository at this point in the history
  • Loading branch information
NickyHavoc committed Apr 12, 2024
1 parent b1b4bce commit 8177567
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 16 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
- The implementation of the HuggingFace repository creation and deletion got moved to `HuggingFaceRepository`
### New Features
- feature: HuggingFaceDataset- & AggregationRepositories now have an explicit `create_repository` function.
- feature: Add `MultipleChunkRetrieverBasedQa`, a task that performs better on faster on retriever-QA, especially with longer context models

### Fixes
...

Expand Down
5 changes: 4 additions & 1 deletion src/intelligence_layer/use_cases/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@
from .qa.multiple_chunk_qa import MultipleChunkQaOutput as MultipleChunkQaOutput
from .qa.multiple_chunk_qa import Subanswer as Subanswer
from .qa.multiple_chunk_retriever_qa import (
MultipleChunkRetrieverBasedQa as MultipleChunkRetrieverBasedQa,
MulMultipleChunkRetrieverQaOutput as MulMultipleChunkRetrieverQaOutput,
)
from .qa.multiple_chunk_retriever_qa import (
MultipleChunkRetrieverQa as MultipleChunkRetrieverQa,
)
from .qa.retriever_based_qa import EnrichedSubanswer as EnrichedSubanswer
from .qa.retriever_based_qa import RetrieverBasedQa as RetrieverBasedQa
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ class AnswerSource(BaseModel, Generic[ID]):
highlights: Sequence[ScoredTextHighlight]


class MultipleChunkRetrieverBasedQaOutput(BaseModel, Generic[ID]):
class MulMultipleChunkRetrieverQaOutput(BaseModel, Generic[ID]):
answer: Optional[str]
sources: Sequence[AnswerSource[ID]]


class MultipleChunkRetrieverBasedQa(
Task[RetrieverBasedQaInput, MultipleChunkRetrieverBasedQaOutput[ID]], Generic[ID]
class MultipleChunkRetrieverQa(
Task[RetrieverBasedQaInput, MulMultipleChunkRetrieverQaOutput[ID]], Generic[ID]
):
"""Answer a question based on documents found by a retriever.
Expand Down Expand Up @@ -56,8 +56,8 @@ class MultipleChunkRetrieverBasedQa(
>>> token = os.getenv("AA_TOKEN")
>>> document_index = DocumentIndexClient(token)
>>> retriever = DocumentIndexRetriever(document_index, "aleph-alpha", "wikipedia-de", 10)
>>> task = MultipleChunkRetrieverQa(retriever)
>>> retriever = DocumentIndexRetriever(document_index, "aleph-alpha", "wikipedia-de", 3)
>>> task = MultipleChunkRetrieverQa(retriever, k=2)
>>> input_data = RetrieverBasedQaInput(question="When was Rome founded?")
>>> tracer = InMemoryTracer()
>>> output = task.run(input_data, tracer)
Expand Down Expand Up @@ -111,14 +111,14 @@ def _get_highlights_per_chunk(

def do_run(
self, input: RetrieverBasedQaInput, task_span: TaskSpan
) -> MultipleChunkRetrieverBasedQaOutput[ID]:
) -> MulMultipleChunkRetrieverQaOutput[ID]:
search_output = self._search.run(
SearchInput(query=input.question), task_span
).results
sorted_search_output = sorted(
search_output,
key=lambda output: output.score, # not reversing on purpose because model performs better if relevant info is at the end
)
)[-self._k:]

chunk, chunk_start_indices = self._combine_input_texts(
[output.document_chunk.text for output in sorted_search_output]
Expand All @@ -138,7 +138,7 @@ def do_run(
chunk_start_indices, single_chunk_qa_output.highlights
)

return MultipleChunkRetrieverBasedQaOutput(
return MulMultipleChunkRetrieverQaOutput(
answer=single_chunk_qa_output.answer,
sources=[
AnswerSource(
Expand Down
11 changes: 4 additions & 7 deletions tests/use_cases/qa/test_multiple_chunk_retriever_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,18 @@
QdrantInMemoryRetriever,
)
from intelligence_layer.core import NoOpTracer
from intelligence_layer.use_cases import (
MultipleChunkRetrieverBasedQa,
RetrieverBasedQaInput,
)
from intelligence_layer.use_cases import MultipleChunkRetrieverQa, RetrieverBasedQaInput


@fixture
def multiple_chunk_retriever_qa(
asymmetric_in_memory_retriever: QdrantInMemoryRetriever,
) -> MultipleChunkRetrieverBasedQa[int]:
return MultipleChunkRetrieverBasedQa(retriever=asymmetric_in_memory_retriever)
) -> MultipleChunkRetrieverQa[int]:
return MultipleChunkRetrieverQa(retriever=asymmetric_in_memory_retriever)


def test_retriever_based_qa_using_in_memory_retriever(
multiple_chunk_retriever_qa: MultipleChunkRetrieverBasedQa[int],
multiple_chunk_retriever_qa: MultipleChunkRetrieverQa[int],
no_op_tracer: NoOpTracer,
) -> None:
question = "When was Robert Moses born?"
Expand Down

0 comments on commit 8177567

Please sign in to comment.