Move QdrantSearch, adjust use-case index

Aleph-Alpha · Oct 31, 2023 · d92cbda · d92cbda
1 parent cf6b466
commit d92cbda
Show file tree

Hide file tree

Showing 6 changed files with 123 additions and 140 deletions.
diff --git a/README.md b/README.md
@@ -18,17 +18,16 @@ The key features of the Intelligence Layer are:
 
 To give you a starting point for using the Intelligence Layer, we provide some pre-configured `Task`s that are ready to use out-of-the-box, as well as an accompanying "Getting started" guide in the form of Jupyter Notebooks.
 
-| Type      | Task                                                                                              | Description                                                               |
-| --------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
-| Classify  | [EmbeddingBasedClassify](./src/intelligence_layer/use_cases/classify/embedding_based_classify.py) | Classify a text of limited size that fits into the models context size with a single class where each class is defined by multiple examples. |
-| Classify  | [SingleLabelClassify](./src/intelligence_layer/use_cases/classify/single_label_classify.py)       | Classify a text of limited size that fits into the models context size with a single class where each class is defined just by its name using zero-shot prompting. |
-| QA        | [LongContextQa](./src/intelligence_layer/use_cases/qa/long_context_qa.py)                         | Answer a question based on one document of any length. |
-| QA        | [MultipleChunkQa](./src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py)                     | Answer a question based a list of text where each element is of limited size that fits into the models context. |
-| QA        | [RetrieverBasedQa](./src/intelligence_layer/use_cases/qa/retriever_based_qa.py)                   | Answer a question based on a document base that is accessed through a [BaseRetriever](...) implementation. |
-| QA        | [SingleChunkQa](./src/intelligence_layer/use_cases/qa/single_chunk_qa.py)                         | Answer a question based on a text of limited size that fits into the models context. |
-| Search    | [QdrantSearch](./src/intelligence_layer/use_cases/search/qdrant_search.py)                        | Search through texts given a query and some filters (Move to core?).           |
-| Search    | [Search](./src/intelligence_layer/use_cases/search/search.py)                                     | Search a document based for document chunks that fit to a given query by using a [BaseRetriever](...) implementation. |
-| Summarize | [ShortBodySummarize](./src/intelligence_layer/use_cases/summarize/summarize.py)                   | Summarize a text of limited size that fits into the models context size into a short body text. |
+| Type      | Task                                                                                                                                                            | Description                                                                                                                                                                                                        |
+| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| Classify  | [EmbeddingBasedClassify](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.EmbeddingBasedClassify) | Classify a short text by computing its similarity with example texts for each class.                                                                                                                               |
+| Classify  | [SingleLabelClassify](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.SingleLabelClassify)       | Classify a short text by assessing each class' probability using zero-shot prompting.                                                                                                                              |
+| QA        | [LongContextQa](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.LongContextQa)                   | Answer a question based on one document of any length.                                                                                                                                                             |
+| QA        | [MultipleChunkQa](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.MultipleChunkQa)               | Answer a question based on a list of short texts.                                                                                                                                                                  |
+| QA        | [RetrieverBasedQa](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.RetrieverBasedQa)             | Answer a question based on a document base using a [BaseRetriever](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.connectors.html#intelligence_layer.connectors.BaseRetriever) implementation. |
+| QA        | [SingleChunkQa](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.SingleChunkQa)                   | Answer a question based on a short text.                                                                                                                                                                           |
+| Search    | [Search](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.Search)                                 | Search for texts in a document base using a [BaseRetriever](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.connectors.html#intelligence_layer.connectors.BaseRetriever) implementation.        |
+| Summarize | [ShortBodySummarize](https://glowing-tribble-223446r.pages.github.io/intelligence_layer.use_cases.html#intelligence_layer.use_cases.ShortBodySummarize)         | Condense a short text into a brief summary.                                                                                                                                                                        |
 
 ### How to make your own
 

diff --git a/src/intelligence_layer/use_cases/__init__.py b/src/intelligence_layer/use_cases/__init__.py
@@ -9,5 +9,7 @@
 )
 from .qa.long_context_qa import LongContextQa, LongContextQaInput
 from .qa.retriever_based_qa import RetrieverBasedQa, RetrieverBasedQaInput
+from .search.search import Search
+from .summarize.summarize import ShortBodySummarize
 
 __all__ = [symbol for symbol in dir() if symbol and symbol[0].isupper()]
diff --git a/src/intelligence_layer/use_cases/classify/embedding_based_classify.py b/src/intelligence_layer/use_cases/classify/embedding_based_classify.py
@@ -17,13 +17,66 @@
     ClassifyInput,
     ClassifyOutput,
 )
-from intelligence_layer.use_cases.search.qdrant_search import (
-    QdrantSearch,
-    QdrantSearchInput,
-)
 from intelligence_layer.use_cases.search.search import SearchOutput
 
 
+class QdrantSearchInput(BaseModel):
+    """The input for a `QdrantSearch` task.
+
+    Attributes:
+        query: The text to be searched with.
+        filter: Conditions to filter by as offered by Qdrant.
+    """
+
+    query: str
+    filter: models.Filter
+
+
+class QdrantSearch(Task[QdrantSearchInput, SearchOutput]):
+    """Performs search to find documents using QDrant filtering methods.
+
+    Given a query, this task will utilize a retriever to fetch relevant text search results.
+    Contrary to `Search`, this `Task` offers the option to filter.
+
+    Args:
+        in_memory_retriever: Implements logic to retrieve matching texts to the query.
+
+    Example:
+        >>> client = Client(os.getenv("AA_TOKEN"))
+        >>> documents = [
+        >>>  Document(
+        >>>         text="West and East Germany reunited in 1990.
+        >>>         metadata={"title": "Germany"}
+        >>>     )
+        >>> ]
+        >>> retriever = InMemoryRetriever(client, documents)
+        >>> task = QdrantSearch(retriever)
+        >>> input = QdrantSearchInput(
+        >>>     query="When did East and West Germany reunite?"
+        >>>     filter=models.Filter(
+        >>>         must=[
+        >>>             models.FieldCondition(
+        >>>                 key="metadata.title",
+        >>>                 match="Germany",
+        >>>             ),
+        >>>         ]
+        >>>     )
+        >>> )
+        >>> logger = InMemoryLogger(name="Qdrant Search")
+        >>> output = task.run(input, logger)
+    """
+
+    def __init__(self, in_memory_retriever: QdrantInMemoryRetriever):
+        super().__init__()
+        self._in_memory_retriever = in_memory_retriever
+
+    def run(self, input: QdrantSearchInput, logger: DebugLogger) -> SearchOutput:
+        results = self._in_memory_retriever.get_filtered_documents_with_scores(
+            input.query, input.filter
+        )
+        return SearchOutput(results=results)
+
+
 class LabelWithExamples(BaseModel):
     """Defines a label and the list of examples making it up.
 

diff --git a/src/intelligence_layer/use_cases/search/qdrant_search.py b/src/intelligence_layer/use_cases/search/qdrant_search.py
diff --git a/tests/use_cases/classify/test_embedding_based_classify.py b/tests/use_cases/classify/test_embedding_based_classify.py
@@ -1,6 +1,13 @@
+from typing import Sequence
+
 from aleph_alpha_client import Client
 from pytest import fixture, raises
+from qdrant_client.http.models import models
 
+from intelligence_layer.connectors.retrievers.base_retriever import Document
+from intelligence_layer.connectors.retrievers.qdrant_in_memory_retriever import (
+    QdrantInMemoryRetriever,
+)
 from intelligence_layer.core.logger import NoOpDebugLogger
 from intelligence_layer.core.task import Chunk
 from intelligence_layer.use_cases.classify.classify import (
@@ -11,9 +18,36 @@
 from intelligence_layer.use_cases.classify.embedding_based_classify import (
     LabelWithExamples,
     EmbeddingBasedClassify,
+    QdrantSearch,
+    QdrantSearchInput,
 )
 
 
+@fixture
+def in_memory_retriever_documents() -> Sequence[Document]:
+    return [
+        Document(
+            text="Germany reunited. I kind of fit and am of the correct type.",
+            metadata={"type": "doc"},
+        ),
+        Document(
+            text="Cats are small animals. Well, I do not fit at all and I am of the correct type.",
+            metadata={"type": "no doc"},
+        ),
+        Document(
+            text="Germany reunited in 1990. This document fits perfectly but it is of the wrong type.",
+            metadata={"type": "no doc"},
+        ),
+    ]
+
+
+@fixture
+def qdrant_search(
+    asymmetric_in_memory_retriever: QdrantInMemoryRetriever,
+) -> QdrantSearch:
+    return QdrantSearch(asymmetric_in_memory_retriever)
+
+
 @fixture
 def embedding_based_classify(client: Client) -> EmbeddingBasedClassify:
     labels_with_examples = [
@@ -37,6 +71,26 @@ def embedding_based_classify(client: Client) -> EmbeddingBasedClassify:
     return EmbeddingBasedClassify(labels_with_examples, client)
 
 
+def test_qdrant_search(
+    qdrant_search: QdrantSearch,
+    no_op_debug_logger: NoOpDebugLogger,
+    in_memory_retriever_documents: Sequence[Document],
+) -> None:
+    search_input = QdrantSearchInput(
+        query="When did Germany reunite?",
+        filter=models.Filter(
+            must=[
+                models.FieldCondition(
+                    key=f"metadata.type",
+                    match=models.MatchValue(value="doc"),
+                ),
+            ]
+        ),
+    )
+    result = qdrant_search.run(search_input, no_op_debug_logger)
+    assert [r.document for r in result.results] == [in_memory_retriever_documents[0]]
+
+
 def test_embedding_based_classify_returns_score_for_all_labels(
     embedding_based_classify: EmbeddingBasedClassify,
 ) -> None:

diff --git a/tests/use_cases/search/test_qdrant_search.py b/tests/use_cases/search/test_qdrant_search.py