diff --git a/README.md b/README.md index bd8dbfee7..c41007af6 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,14 @@ The key features of the Intelligence Layer are: Not sure where to start? Familiarize yourself with the Intelligence Layer using the below notebooks. -| Order | Task | Description | Notebook 📓 | -| ----- | ------------------ | --------------------------------------- | ------------------------------------------------------------- | -| 1 | Summarization | Summarize a document | [summarize.ipynb](./src/examples/summarize.ipynb) | -| 2 | Question Answering | Various approaches for QA | [qa.ipynb](./src/examples/qa.ipynb) | -| 3 | Quickstart task | Build a custom task for your use case | [quickstart_task.ipynb](./src/examples/quickstart_task.ipynb) | -| 4 | Classification | Conduct zero-shot text classification | [classify.ipynb](./src/examples/classify.ipynb) | -| 5 | Document Index | Connect your proprietary knowledge base | [document_index.ipynb](./src/examples/document_index.ipynb) | +| Order | Task | Description | Notebook 📓 | +| ----- | ------------------------------ | --------------------------------------- | ------------------------------------------------------------------------------- | +| 1 | Summarization | Summarize a document | [summarize.ipynb](./src/examples/summarize.ipynb) | +| 2 | Question Answering | Various approaches for QA | [qa.ipynb](./src/examples/qa.ipynb) | +| 3 | Quickstart task | Build a custom task for your use case | [quickstart_task.ipynb](./src/examples/quickstart_task.ipynb) | +| 4 | Single label Classification | Conduct zero-shot text classification | [single_label_classify.ipynb](./src/examples/single_label_classify.ipynb) | +| 5 | Embedding based Classification | Classify texts on the basis of examples | [embedding_based_classify.ipynb](./src/examples/embedding_based_classify.ipynb) | +| 6 | Document Index | Connect your proprietary knowledge base | [document_index.ipynb](./src/examples/document_index.ipynb) | ## Getting started with the Jupyter Notebooks diff --git a/src/examples/embedding_based_classify.ipynb b/src/examples/embedding_based_classify.ipynb new file mode 100644 index 000000000..14e5b1121 --- /dev/null +++ b/src/examples/embedding_based_classify.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Embedding-Based Classification\n", + "\n", + "Large language model embeddings offer a powerful approach to text classification.\n", + "In this method, each example from various classes is transformed into a vector representation using the embeddings from the language model.\n", + "These embedded vectors capture the semantic essence of the text.\n", + "Once this is done, clusters of embeddings are formed for each class, representing the centroid or the average meaning of the examples within that class.\n", + "When a new piece of text needs to be classified, it is first embedded using the same language model.\n", + "This new embedded vector is then compared to the pre-defined clusters for each class using a cosine similarity.\n", + "The class whose cluster is closest to the new text's embedding is then assigned to the text, thereby achieving classification.\n", + "This method leverages the deep semantic understanding of large language models to classify texts with high accuracy and nuance.\n", + "\n", + "### When should you use embedding-based classification?\n", + "\n", + "We recommend using this type of classification when...\n", + "- ...proper classification requires fine-grained control over the classes' definitions.\n", + "- ...the labels can be defined mostly or purely by the semantic meaning of the examples.\n", + "- ...examples for each label are readily available.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's start by instantiating a classifier for sentiment classification." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from os import getenv\n", + "\n", + "from aleph_alpha_client import Client\n", + "\n", + "from intelligence_layer.use_cases.classify.embedding_based_classify import EmbeddingBasedClassify, LabelWithExamples\n", + "\n", + "\n", + "client = Client(getenv(\"AA_TOKEN\"))\n", + "labels_with_examples = [\n", + " LabelWithExamples(\n", + " name=\"positive\",\n", + " examples=[\n", + " \"I really like this.\",\n", + " \"Wow, your hair looks great!\",\n", + " \"We're so in love.\",\n", + " \"That truly was the best day of my life!\",\n", + " \"What a great movie.\"\n", + " ],\n", + " ),\n", + " LabelWithExamples(\n", + " name=\"negative\",\n", + " examples=[\n", + " \"I really dislike this.\",\n", + " \"Ugh, Your hair looks horrible!\",\n", + " \"We're not in love anymore.\",\n", + " \"My day was very bad, I did not have a good time.\",\n", + " \"They make terrible food.\"\n", + " ],\n", + " ),\n", + "]\n", + "classify = EmbeddingBasedClassify(labels_with_examples, client)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alright, let's classify a new example!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from intelligence_layer.core.logger import InMemoryDebugLogger\n", + "from intelligence_layer.use_cases.classify.classify import ClassifyInput\n", + "\n", + "\n", + "classify_input = ClassifyInput(\n", + " chunk=\"It was very awkward with him, I did not enjoy it.\",\n", + " labels=frozenset(l.name for l in labels_with_examples)\n", + ")\n", + "logger = InMemoryDebugLogger(name=\"Classify\")\n", + "result = classify.run(classify_input, logger)\n", + "result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.10-intelligence", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/examples/quickstart_task.ipynb b/src/examples/quickstart_task.ipynb index 1ff54bafb..6c2bec4f5 100644 --- a/src/examples/quickstart_task.ipynb +++ b/src/examples/quickstart_task.ipynb @@ -436,7 +436,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/src/examples/classify.ipynb b/src/examples/single_label_classify.ipynb similarity index 89% rename from src/examples/classify.ipynb rename to src/examples/single_label_classify.ipynb index a4c51e5e0..4d49d67e5 100644 --- a/src/examples/classify.ipynb +++ b/src/examples/single_label_classify.ipynb @@ -4,20 +4,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Classify\n", + "# Single Label Classification\n", "\n", - "Classification is a methodology that tries to match a text to the correct label.\n", + "Single-label classification, also known as single-class or binary classification, refers to the task of categorizing data points into one of n distinct categories or classes.\n", + "In this type of classification, each input is assigned to only one class, ensuring that no overlap exists between categories.\n", + "Common applications of single-label classification include email spam detection, where emails are classified as either \"spam\" or \"not spam\", or sentiment classification, where a text can be \"positive\", \"negative\" or \"neutral\".\n", + "The primary goal is to train a model that can accurately predict the correct class for any given input based on its features.\n", "\n", "### Prompt-based classification\n", "\n", - "Prompt-based classification is a methodology that relies purely on prompting the LLM in a specific way.\n", + "Here, we'll use a purely prompt-based approach for classification.\n", "\n", "### When should you use prompt-based classification?\n", "\n", - "Some situations when you would use this methodology is when:\n", - "- The labels are easily understood (they don't require explanation or examples), for example sentiment analysis\n", - "- The labels are not recognized by their semantic meaning, e.g. \"reasoning\" tasks like classifying contradictions\n", - "- You don't have many examples\n", + "We recommend using this type of classification when...\n", + "- ...the labels are easily understood (they don't require explanation or examples).\n", + "- ...the labels cannot be recognized purely by their semantic meaning.\n", + "- ...many examples for each label aren't readily available.\n", "\n", "### Example snippet\n", "\n", @@ -55,8 +58,7 @@ "debug_log = InMemoryDebugLogger(name=\"classify\")\n", "output = task.run(input, debug_log)\n", "for label, score in output.scores.items():\n", - " print(f\"{label}: {round(score, 4)}\")\n", - "# debug_log\n" + " print(f\"{label}: {round(score, 4)}\")\n" ] }, { @@ -251,9 +253,9 @@ "metadata": {}, "outputs": [], "source": [ - "from intelligence_layer.use_cases.classify.single_label_classify import SingleLabelClassifyEvaluator\n", + "from intelligence_layer.use_cases.classify.classify import ClassifyEvaluator\n", "\n", - "evaluator = SingleLabelClassifyEvaluator(task)\n", + "evaluator = ClassifyEvaluator(task)\n", "classify_input = ClassifyInput(\n", " chunk=Chunk(\"This is good\"),\n", " labels=frozenset({\"positive\", \"negative\"}),\n", diff --git a/src/intelligence_layer/use_cases/classify/classify.py b/src/intelligence_layer/use_cases/classify/classify.py index 09ad96274..fee788a4f 100644 --- a/src/intelligence_layer/use_cases/classify/classify.py +++ b/src/intelligence_layer/use_cases/classify/classify.py @@ -1,9 +1,12 @@ from typing import ( Mapping, + Sequence, ) from pydantic import BaseModel -from intelligence_layer.core.task import Chunk, Probability +from intelligence_layer.core.evaluator import Evaluator +from intelligence_layer.core.logger import DebugLogger +from intelligence_layer.core.task import Chunk, Probability, Task class ClassifyInput(BaseModel): @@ -29,3 +32,74 @@ class ClassifyOutput(BaseModel): """ scores: Mapping[str, Probability] + + +class Classify(Task[ClassifyInput, ClassifyOutput]): + """Placeholder class for any classifier implementation.""" + + pass + + +class ClassifyEvaluation(BaseModel): + """The evaluation of a single label classification run. + + Attributes: + correct: Was the highest scoring class from the output in the set of "correct classes" + output: The actual output from the task run + """ + + correct: bool + output: ClassifyOutput + + +class AggregatedClassifyEvaluation(BaseModel): + """The aggregated evaluation of a single label classify implementation against a dataset. + + Attributes: + percentage_correct: Percentage of answers that were considered to be correct + evaluation: The actual evaluations + """ + + percentage_correct: float + evaluations: Sequence[ClassifyEvaluation] + + +class ClassifyEvaluator( + Evaluator[ + ClassifyInput, + Sequence[str], + ClassifyEvaluation, + AggregatedClassifyEvaluation, + ] +): + def __init__(self, task: Classify): + self.task = task + + def evaluate( + self, + input: ClassifyInput, + logger: DebugLogger, + expected_output: Sequence[str], + ) -> ClassifyEvaluation: + output = self.task.run(input, logger) + sorted_classes = sorted( + output.scores.items(), key=lambda item: item[1], reverse=True + ) + if sorted_classes[0][0] in expected_output: + correct = True + else: + correct = False + return ClassifyEvaluation(correct=correct, output=output) + + def aggregate( + self, evaluations: Sequence[ClassifyEvaluation] + ) -> AggregatedClassifyEvaluation: + if len(evaluations) != 0: + correct_answers = len( + [eval.correct for eval in evaluations if eval.correct == True] + ) / len(evaluations) + else: + correct_answers = 0 + return AggregatedClassifyEvaluation( + percentage_correct=correct_answers, evaluations=evaluations + ) diff --git a/src/intelligence_layer/use_cases/classify/embedding_based_classify.py b/src/intelligence_layer/use_cases/classify/embedding_based_classify.py index 1fc34bf2f..63fa56731 100644 --- a/src/intelligence_layer/use_cases/classify/embedding_based_classify.py +++ b/src/intelligence_layer/use_cases/classify/embedding_based_classify.py @@ -13,7 +13,11 @@ ) from intelligence_layer.core.logger import DebugLogger from intelligence_layer.core.task import Chunk, Probability, Task -from intelligence_layer.use_cases.classify.classify import ClassifyInput, ClassifyOutput +from intelligence_layer.use_cases.classify.classify import ( + Classify, + ClassifyInput, + ClassifyOutput, +) from intelligence_layer.use_cases.search.filter_search import ( FilterSearch, FilterSearchInput, @@ -46,7 +50,7 @@ class EmbeddingBasedClassifyScoring(Enum): MEAN_TOP_5 = 5 -class EmbeddingBasedClassify(Task[ClassifyInput, ClassifyOutput]): +class EmbeddingBasedClassify(Classify): """Task that classifies a given input text based on examples. The input contains a complete set of all possible labels. The output will return a score @@ -119,7 +123,7 @@ def run(self, input: ClassifyInput, logger: DebugLogger) -> ClassifyOutput: ) unknown_labels = input.labels - available_labels if unknown_labels: - raise ValueError(f"Got unexpected labels: {unknown_labels}") + raise ValueError(f"Got unexpected labels: {', '.join(unknown_labels)}.") labels = list(input.labels) # converting to list to preserve order results_per_label = [ self._label_search(input.chunk, label, logger) for label in labels diff --git a/src/intelligence_layer/use_cases/classify/single_label_classify.py b/src/intelligence_layer/use_cases/classify/single_label_classify.py index dda3da54f..ba44097dc 100644 --- a/src/intelligence_layer/use_cases/classify/single_label_classify.py +++ b/src/intelligence_layer/use_cases/classify/single_label_classify.py @@ -12,23 +12,25 @@ PromptTemplate, Prompt, ) -from pydantic import BaseModel from intelligence_layer.core.complete import ( Complete, ) from intelligence_layer.core.echo import EchoInput, EchoTask, TokenWithProb -from intelligence_layer.core.evaluator import Evaluator from intelligence_layer.core.logger import DebugLogger -from intelligence_layer.core.task import Probability, Task, Token -from intelligence_layer.use_cases.classify.classify import ClassifyInput, ClassifyOutput +from intelligence_layer.core.task import Probability, Token +from intelligence_layer.use_cases.classify.classify import ( + Classify, + ClassifyInput, + ClassifyOutput, +) def to_aa_tokens_prompt(tokens: Sequence[Token]) -> Prompt: return Prompt.from_tokens([token.token_id for token in tokens]) -class SingleLabelClassify(Task[ClassifyInput, ClassifyOutput]): +class SingleLabelClassify(Classify): """Task that classifies a given input text with one of the given classes. The input contains a complete set of all possible labels. The output will return a score for @@ -208,68 +210,3 @@ def path(self, tokens: Iterable[Token]) -> Iterable[TokenWithProb]: node = child assert node.token and node.normalized_prob yield TokenWithProb(token=node.token, prob=node.normalized_prob) - - -class ClassifyEvaluation(BaseModel): - """The evaluation of a single label classification run. - - Attributes: - correct: Was the highest scoring class from the output in the set of "correct classes" - output: The actual output from the task run - """ - - correct: bool - output: ClassifyOutput - - -class AggregatedClassifyEvaluation(BaseModel): - """The aggregated evaluation of a single label classify implementation against a dataset. - - Attributes: - percentage_correct: Percentage of answers that were considered to be correct - evaluation: The actual evaluations - """ - - percentage_correct: float - evaluations: Sequence[ClassifyEvaluation] - - -class SingleLabelClassifyEvaluator( - Evaluator[ - ClassifyInput, - Sequence[str], - ClassifyEvaluation, - AggregatedClassifyEvaluation, - ] -): - def __init__(self, task: SingleLabelClassify): - self.task = task - - def evaluate( - self, - input: ClassifyInput, - logger: DebugLogger, - expected_output: Sequence[str], - ) -> ClassifyEvaluation: - output = self.task.run(input, logger) - sorted_classes = sorted( - output.scores.items(), key=lambda item: item[1], reverse=True - ) - if sorted_classes[0][0] in expected_output: - correct = True - else: - correct = False - return ClassifyEvaluation(correct=correct, output=output) - - def aggregate( - self, evaluations: Sequence[ClassifyEvaluation] - ) -> AggregatedClassifyEvaluation: - if len(evaluations) != 0: - correct_answers = len( - [eval.correct for eval in evaluations if eval.correct == True] - ) / len(evaluations) - else: - correct_answers = 0 - return AggregatedClassifyEvaluation( - percentage_correct=correct_answers, evaluations=evaluations - ) diff --git a/tests/use_cases/classify/test_embedding_based_classify.py b/tests/use_cases/classify/test_embedding_based_classify.py index 48bc2f684..8714859ab 100644 --- a/tests/use_cases/classify/test_embedding_based_classify.py +++ b/tests/use_cases/classify/test_embedding_based_classify.py @@ -4,6 +4,7 @@ from intelligence_layer.core.logger import NoOpDebugLogger from intelligence_layer.core.task import Chunk from intelligence_layer.use_cases.classify.classify import ( + ClassifyEvaluator, ClassifyInput, ClassifyOutput, ) @@ -60,3 +61,19 @@ def test_embedding_based_classify_raises_for_unknown_label( ) with raises(ValueError) as e: embedding_based_classify.run(classify_input, NoOpDebugLogger()) + + +def test_can_evaluate_embedding_based_classify( + embedding_based_classify: EmbeddingBasedClassify, +) -> None: + classify_input = ClassifyInput( + chunk=Chunk("This is good"), + labels=frozenset({"positive", "negative"}), + ) + evaluator = ClassifyEvaluator(task=embedding_based_classify) + + evaluation = evaluator.evaluate( + input=classify_input, logger=NoOpDebugLogger(), expected_output=["positive"] + ) + + assert evaluation.correct == True diff --git a/tests/use_cases/classify/test_single_label_classify.py b/tests/use_cases/classify/test_single_label_classify.py index 5f37d38ab..61192a2d6 100644 --- a/tests/use_cases/classify/test_single_label_classify.py +++ b/tests/use_cases/classify/test_single_label_classify.py @@ -9,10 +9,10 @@ from intelligence_layer.use_cases.classify.classify import ( ClassifyInput, ClassifyOutput, + ClassifyEvaluator, ) from intelligence_layer.use_cases.classify.single_label_classify import ( SingleLabelClassify, - SingleLabelClassifyEvaluator, ) @@ -109,7 +109,7 @@ def test_can_evaluate_classify(single_label_classify: SingleLabelClassify) -> No chunk=Chunk("This is good"), labels=frozenset({"positive", "negative"}), ) - evaluator = SingleLabelClassifyEvaluator(task=single_label_classify) + evaluator = ClassifyEvaluator(task=single_label_classify) evaluation = evaluator.evaluate( input=classify_input, logger=NoOpDebugLogger(), expected_output=["positive"] @@ -137,9 +137,7 @@ def test_can_aggregate_evaluations( expected_output=positive_lst, ) - single_label_classify_evaluator = SingleLabelClassifyEvaluator( - task=single_label_classify - ) + single_label_classify_evaluator = ClassifyEvaluator(task=single_label_classify) dataset = Dataset( name="classify_test", examples=[correct_example, incorrect_example] @@ -155,9 +153,7 @@ def test_can_aggregate_evaluations( def test_aggregating_evaluations_works_with_empty_list( single_label_classify: SingleLabelClassify, ) -> None: - single_label_classify_evaluator = SingleLabelClassifyEvaluator( - task=single_label_classify - ) + single_label_classify_evaluator = ClassifyEvaluator(task=single_label_classify) aggregated_evaluations = single_label_classify_evaluator.evaluate_dataset( Dataset(name="empty_dataset", examples=[]), logger=NoOpDebugLogger() diff --git a/tests/use_cases/search/test_filter_search.py b/tests/use_cases/search/test_filter_search.py new file mode 100644 index 000000000..7f8e09a30 --- /dev/null +++ b/tests/use_cases/search/test_filter_search.py @@ -0,0 +1,58 @@ +from pytest import fixture +from typing import Sequence + +from qdrant_client.http.models import models + +from intelligence_layer.connectors.retrievers.base_retriever import Document +from intelligence_layer.connectors.retrievers.in_memory_retriever import ( + InMemoryRetriever, +) +from intelligence_layer.core.logger import NoOpDebugLogger +from intelligence_layer.use_cases.search.filter_search import ( + FilterSearch, + FilterSearchInput, +) + + +@fixture +def in_memory_retriever_documents() -> Sequence[Document]: + return [ + Document( + text="Germany reunited. I kind of fit and am of the correct type.", + metadata={"type": "doc"}, + ), + Document( + text="Cats are small animals. Well, I do not fit at all but I am of the correct type.", + metadata={"type": "doc"}, + ), + Document( + text="Germany reunited in 1990. This document fits perfectly but it is of the wrong type.", + metadata={"type": "no doc"}, + ), + ] + + +@fixture +def filter_search(asymmetric_in_memory_retriever: InMemoryRetriever) -> FilterSearch: + return FilterSearch(asymmetric_in_memory_retriever) + + +def test_filter_search( + filter_search: FilterSearch, + no_op_debug_logger: NoOpDebugLogger, + in_memory_retriever_documents: Sequence[Document], +) -> None: + search_input = FilterSearchInput( + query="When did Germany reunite?", + limit=1, + filter=models.Filter( + must=[ + models.FieldCondition( + key=f"metadata.type", + match=models.MatchValue(value="doc"), + ), + ] + ), + ) + result = filter_search.run(search_input, no_op_debug_logger) + assert [r.document for r in result.results] == [in_memory_retriever_documents[0]]