diff --git a/docs/intelligence_layer.core.rst b/docs/intelligence_layer.core.rst index 2191e2518..cce640699 100644 --- a/docs/intelligence_layer.core.rst +++ b/docs/intelligence_layer.core.rst @@ -6,4 +6,4 @@ Module contents .. automodule:: intelligence_layer.core - .. autoclass:: Chunk + .. autoclass:: TextChunk diff --git a/src/examples/classification.ipynb b/src/examples/classification.ipynb index 195e29440..bb38ed63f 100644 --- a/src/examples/classification.ipynb +++ b/src/examples/classification.ipynb @@ -45,12 +45,12 @@ "from dotenv import load_dotenv\n", "\n", "from intelligence_layer.connectors import LimitedConcurrencyClient\n", - "from intelligence_layer.core import Chunk, InMemoryTracer\n", + "from intelligence_layer.core import TextChunk, InMemoryTracer\n", "from intelligence_layer.use_cases import ClassifyInput, PromptBasedClassify\n", "\n", "load_dotenv()\n", "\n", - "text_to_classify = Chunk(\n", + "text_to_classify = TextChunk(\n", " \"In the distant future, a space exploration party embarked on a thrilling journey to the uncharted regions of the galaxy. \\n\\\n", "With excitement in their hearts and the cosmos as their canvas, they ventured into the unknown, discovering breathtaking celestial wonders. \\n\\\n", "As they gazed upon distant stars and nebulas, they forged unforgettable memories that would forever bind them as pioneers of the cosmos.\"\n", diff --git a/src/examples/evaluation.ipynb b/src/examples/evaluation.ipynb index dbf49a083..815088693 100644 --- a/src/examples/evaluation.ipynb +++ b/src/examples/evaluation.ipynb @@ -99,13 +99,13 @@ "metadata": {}, "outputs": [], "source": [ - "from intelligence_layer.core import Chunk, NoOpTracer\n", + "from intelligence_layer.core import TextChunk, NoOpTracer\n", "from intelligence_layer.use_cases import ClassifyInput\n", "from intelligence_layer.evaluation import Example\n", "\n", "\n", "classify_input = ClassifyInput(\n", - " chunk=Chunk(\"This is good\"),\n", + " chunk=TextChunk(\"This is good\"),\n", " labels=frozenset({\"positive\", \"negative\"}),\n", ")\n", "\n", @@ -191,7 +191,7 @@ "dataset_id = dataset_repository.create_dataset(\n", " examples=[\n", " Example(\n", - " input=ClassifyInput(chunk=Chunk(d[\"text\"]), labels=all_labels),\n", + " input=ClassifyInput(chunk=TextChunk(d[\"text\"]), labels=all_labels),\n", " expected_output=d[\"label_name\"],\n", " )\n", " for d in data\n", @@ -391,7 +391,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/src/intelligence_layer/core/__init__.py b/src/intelligence_layer/core/__init__.py index 7b063fbcc..560d45c3e 100644 --- a/src/intelligence_layer/core/__init__.py +++ b/src/intelligence_layer/core/__init__.py @@ -7,15 +7,15 @@ from .chunk import Chunk as Chunk from .chunk import ChunkInput as ChunkInput from .chunk import ChunkOutput as ChunkOutput -from .chunk import ChunkOverlapTask as ChunkOverlapTask -from .chunk import ChunkTask as ChunkTask +from .chunk import ChunkOverlap as ChunkOverlap +from .chunk import TextChunk as TextChunk from .detect_language import DetectLanguage as DetectLanguage from .detect_language import DetectLanguageInput as DetectLanguageInput from .detect_language import DetectLanguageOutput as DetectLanguageOutput from .detect_language import Language as Language +from .echo import Echo as Echo from .echo import EchoInput as EchoInput from .echo import EchoOutput as EchoOutput -from .echo import EchoTask as EchoTask from .echo import TokenWithLogProb as TokenWithLogProb from .instruct import Instruct as Instruct from .instruct import InstructInput as InstructInput diff --git a/src/intelligence_layer/core/chunk.py b/src/intelligence_layer/core/chunk.py index 098b5bebe..d0528f160 100644 --- a/src/intelligence_layer/core/chunk.py +++ b/src/intelligence_layer/core/chunk.py @@ -3,11 +3,11 @@ from pydantic import BaseModel from semantic_text_splitter import HuggingFaceTextSplitter -from intelligence_layer.core.model import ControlModel +from intelligence_layer.core.model import ControlModel, LuminousControlModel from intelligence_layer.core.task import Task from intelligence_layer.core.tracer.tracer import TaskSpan -Chunk = NewType("Chunk", str) +TextChunk = NewType("TextChunk", str) """Segment of a larger text. This type infers that the string is smaller than the context size of the model where it is used. @@ -35,10 +35,10 @@ class ChunkOutput(BaseModel): chunks: A list of smaller sections of the input text. """ - chunks: Sequence[Chunk] + chunks: Sequence[TextChunk] -class ChunkTask(Task[ChunkInput, ChunkOutput]): +class Chunk(Task[ChunkInput, ChunkOutput]): """Splits a longer text into smaller text chunks. Provide a text of any length and chunk it into smaller pieces using a @@ -50,20 +50,23 @@ class ChunkTask(Task[ChunkInput, ChunkOutput]): max_tokens_per_chunk: The maximum number of tokens to fit into one chunk. """ - def __init__(self, model: ControlModel, max_tokens_per_chunk: int): + def __init__( + self, model: ControlModel | None = None, max_tokens_per_chunk: int = 512 + ): super().__init__() + model = model or LuminousControlModel() self._splitter = HuggingFaceTextSplitter(model.get_tokenizer()) self._max_tokens_per_chunk = max_tokens_per_chunk def do_run(self, input: ChunkInput, task_span: TaskSpan) -> ChunkOutput: chunks = [ - Chunk(t) + TextChunk(t) for t in self._splitter.chunks(input.text, self._max_tokens_per_chunk) ] return ChunkOutput(chunks=chunks) -class ChunkOverlapTask(Task[ChunkInput, ChunkOutput]): +class ChunkOverlap(Task[ChunkInput, ChunkOutput]): """Splits a longer text into smaller text chunks, where every chunk overlaps with the previous chunk by `overlap_length_tokens` number of tokens. @@ -79,9 +82,9 @@ class ChunkOverlapTask(Task[ChunkInput, ChunkOutput]): def __init__( self, - model: ControlModel, - max_tokens_per_chunk: int, - overlap_length_tokens: int, + model: ControlModel | None = None, + max_tokens_per_chunk: int = 512, + overlap_length_tokens: int = 0, ): super().__init__() if overlap_length_tokens >= max_tokens_per_chunk: @@ -90,8 +93,10 @@ def __init__( overlap_length_tokens, max_tokens_per_chunk ) ) - self.chunk_task = ChunkTask(model, overlap_length_tokens // 2) + + model = model or LuminousControlModel() self.tokenizer = model.get_tokenizer() + self.chunk_task = Chunk(model, overlap_length_tokens // 2) self.max_tokens_per_chunk = max_tokens_per_chunk self.overlap_length_tokens = overlap_length_tokens diff --git a/src/intelligence_layer/core/echo.py b/src/intelligence_layer/core/echo.py index db2a82970..63ba47c69 100644 --- a/src/intelligence_layer/core/echo.py +++ b/src/intelligence_layer/core/echo.py @@ -4,7 +4,11 @@ from pydantic import BaseModel from tokenizers import Encoding # type: ignore -from intelligence_layer.core.model import CompleteInput, ControlModel +from intelligence_layer.core.model import ( + CompleteInput, + ControlModel, + LuminousControlModel, +) from intelligence_layer.core.prompt_template import PromptTemplate from intelligence_layer.core.task import Task, Token from intelligence_layer.core.tracer.tracer import TaskSpan @@ -18,7 +22,7 @@ class TokenWithLogProb(BaseModel): class EchoInput(BaseModel): - """The input for an `EchoTask`. + """The input for an `Echo` task. Attributes: prompt: The input text that serves as the starting point for the LLM. @@ -31,7 +35,7 @@ class EchoInput(BaseModel): class EchoOutput(BaseModel): - """The output of an `EchoTask`. + """The output of an `Echo` task. Attributes: tokens_with_log_probs: Every token of the `expected_completion` of the @@ -42,7 +46,7 @@ class EchoOutput(BaseModel): tokens_with_log_probs: Sequence[TokenWithLogProb] -class EchoTask(Task[EchoInput, EchoOutput]): +class Echo(Task[EchoInput, EchoOutput]): """Task that returns probabilities of a completion given a prompt. Analyzes the likelihood of generating tokens in the expected completion based on @@ -53,10 +57,10 @@ class EchoTask(Task[EchoInput, EchoOutput]): Example: >>> from aleph_alpha_client import Prompt - >>> from intelligence_layer.core import EchoTask,EchoInput, InMemoryTracer, LuminousControlModel + >>> from intelligence_layer.core import Echo, EchoInput, InMemoryTracer, LuminousControlModel >>> model = LuminousControlModel(name="luminous-base-control") - >>> task = EchoTask(model) + >>> task = Echo(model) >>> input = EchoInput( ... prompt=Prompt.from_text("This is a "), ... expected_completion="happy text", @@ -67,9 +71,9 @@ class EchoTask(Task[EchoInput, EchoOutput]): PROMPT_TEMPLATE_STR: str = "{{prompt}}{{expected_completion}}" - def __init__(self, model: ControlModel) -> None: + def __init__(self, model: ControlModel | None = None) -> None: super().__init__() - self._model = model + self._model = model or LuminousControlModel() def do_run(self, input: EchoInput, task_span: TaskSpan) -> EchoOutput: # We tokenize the prompt separately so we don't have an overlap in the tokens. diff --git a/src/intelligence_layer/core/instruct.py b/src/intelligence_layer/core/instruct.py index 25fdd0cd8..52033b686 100644 --- a/src/intelligence_layer/core/instruct.py +++ b/src/intelligence_layer/core/instruct.py @@ -2,7 +2,12 @@ from pydantic import BaseModel -from intelligence_layer.core.model import CompleteInput, CompleteOutput, ControlModel +from intelligence_layer.core.model import ( + CompleteInput, + CompleteOutput, + ControlModel, + LuminousControlModel, +) from intelligence_layer.core.task import Task from intelligence_layer.core.tracer.tracer import TaskSpan @@ -15,9 +20,10 @@ class InstructInput(BaseModel): class Instruct(Task[InstructInput, CompleteOutput]): - def __init__(self, model: ControlModel) -> None: + + def __init__(self, model: ControlModel | None = None) -> None: super().__init__() - self._model = model + self._model = model or LuminousControlModel() def do_run(self, input: InstructInput, task_span: TaskSpan) -> CompleteOutput: prompt = self._model.to_instruct_prompt( diff --git a/src/intelligence_layer/core/model.py b/src/intelligence_layer/core/model.py index c89d9a5d0..541a82476 100644 --- a/src/intelligence_layer/core/model.py +++ b/src/intelligence_layer/core/model.py @@ -209,7 +209,7 @@ def __init__( "luminous-base-control-20240215", "luminous-extended-control-20240215", "luminous-supreme-control-20240215", - ], + ] = "luminous-base-control", client: Optional[AlephAlphaClientProtocol] = None, ) -> None: super().__init__(name, client) diff --git a/src/intelligence_layer/core/text_highlight.py b/src/intelligence_layer/core/text_highlight.py index 9d92baadb..357263775 100644 --- a/src/intelligence_layer/core/text_highlight.py +++ b/src/intelligence_layer/core/text_highlight.py @@ -9,7 +9,12 @@ from aleph_alpha_client.explanation import TextScoreWithRaw from pydantic import BaseModel -from intelligence_layer.core.model import ControlModel, ExplainInput, ExplainOutput +from intelligence_layer.core.model import ( + ControlModel, + ExplainInput, + ExplainOutput, + LuminousControlModel, +) from intelligence_layer.core.prompt_template import ( Cursor, PromptRange, @@ -97,11 +102,11 @@ class TextHighlight(Task[TextHighlightInput, TextHighlightOutput]): def __init__( self, - model: ControlModel, + model: ControlModel | None = None, granularity: PromptGranularity = PromptGranularity.Sentence, ) -> None: super().__init__() - self._model = model + self._model = model or LuminousControlModel() self._granularity = granularity def do_run( diff --git a/src/intelligence_layer/core/tracer/composite_tracer.py b/src/intelligence_layer/core/tracer/composite_tracer.py index 8ca21ad14..ebddc7424 100644 --- a/src/intelligence_layer/core/tracer/composite_tracer.py +++ b/src/intelligence_layer/core/tracer/composite_tracer.py @@ -21,14 +21,14 @@ class CompositeTracer(Tracer, Generic[TracerVar]): tracers: tracers that will be forwarded all subsequent log and span calls. Example: - >>> from intelligence_layer.core import InMemoryTracer, FileTracer, CompositeTracer, Chunk + >>> from intelligence_layer.core import InMemoryTracer, FileTracer, CompositeTracer, TextChunk >>> from intelligence_layer.use_cases import PromptBasedClassify, ClassifyInput >>> tracer_1 = InMemoryTracer() >>> tracer_2 = InMemoryTracer() >>> tracer = CompositeTracer([tracer_1, tracer_2]) >>> task = PromptBasedClassify() - >>> response = task.run(ClassifyInput(chunk=Chunk("Cool"), labels=frozenset({"label", "other label"})), tracer) + >>> response = task.run(ClassifyInput(chunk=TextChunk("Cool"), labels=frozenset({"label", "other label"})), tracer) """ def __init__(self, tracers: Sequence[TracerVar]) -> None: diff --git a/src/intelligence_layer/use_cases/classify/classify.py b/src/intelligence_layer/use_cases/classify/classify.py index 4e5cd4a8c..c108ead4d 100644 --- a/src/intelligence_layer/use_cases/classify/classify.py +++ b/src/intelligence_layer/use_cases/classify/classify.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from intelligence_layer.core import Chunk +from intelligence_layer.core import TextChunk from intelligence_layer.evaluation import Example, MeanAccumulator from intelligence_layer.evaluation.base_logic import ( AggregationLogic, @@ -21,7 +21,7 @@ class ClassifyInput(BaseModel): labels: Possible labels the model will choose a label from """ - chunk: Chunk + chunk: TextChunk labels: frozenset[str] diff --git a/src/intelligence_layer/use_cases/classify/embedding_based_classify.py b/src/intelligence_layer/use_cases/classify/embedding_based_classify.py index a40aeff4b..b53eefd7a 100644 --- a/src/intelligence_layer/use_cases/classify/embedding_based_classify.py +++ b/src/intelligence_layer/use_cases/classify/embedding_based_classify.py @@ -12,7 +12,7 @@ QdrantInMemoryRetriever, RetrieverType, ) -from intelligence_layer.core import Chunk, Task, TaskSpan +from intelligence_layer.core import Task, TaskSpan, TextChunk from intelligence_layer.use_cases.classify.classify import ( ClassifyInput, MultiLabelClassifyOutput, @@ -129,7 +129,7 @@ class EmbeddingBasedClassify(Task[ClassifyInput, MultiLabelClassifyOutput]): >>> from intelligence_layer.connectors.limited_concurrency_client import ( ... LimitedConcurrencyClient, ... ) - >>> from intelligence_layer.core import Chunk, InMemoryTracer + >>> from intelligence_layer.core import TextChunk, InMemoryTracer >>> from intelligence_layer.use_cases.classify.classify import ClassifyInput >>> from intelligence_layer.use_cases.classify.embedding_based_classify import ( ... EmbeddingBasedClassify, @@ -153,7 +153,7 @@ class EmbeddingBasedClassify(Task[ClassifyInput, MultiLabelClassifyOutput]): ... ] >>> client = LimitedConcurrencyClient.from_env() >>> task = EmbeddingBasedClassify(client, labels_with_examples) - >>> input = ClassifyInput(chunk=Chunk("This is a happy text."), labels=frozenset({"positive", "negative"})) + >>> input = ClassifyInput(chunk=TextChunk("This is a happy text."), labels=frozenset({"positive", "negative"})) >>> tracer = InMemoryTracer() >>> output = task.run(input, tracer) """ @@ -212,7 +212,7 @@ def _validate_input_labels(self, input: ClassifyInput) -> None: raise ValueError(f"Got unexpected labels: {', '.join(unknown_labels)}.") def _label_search( - self, chunk: Chunk, label: str, task_span: TaskSpan + self, chunk: TextChunk, label: str, task_span: TaskSpan ) -> SearchOutput[int]: search_input = QdrantSearchInput( query=chunk, diff --git a/src/intelligence_layer/use_cases/classify/keyword_extract.py b/src/intelligence_layer/use_cases/classify/keyword_extract.py index 71d4d31d0..3d329aaf7 100644 --- a/src/intelligence_layer/use_cases/classify/keyword_extract.py +++ b/src/intelligence_layer/use_cases/classify/keyword_extract.py @@ -3,7 +3,7 @@ from pydantic import BaseModel from intelligence_layer.core import Task, TaskSpan -from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.chunk import TextChunk from intelligence_layer.core.detect_language import Language, language_config from intelligence_layer.core.model import ( CompleteInput, @@ -31,7 +31,7 @@ class KeywordExtractInput(BaseModel): - chunk: Chunk + chunk: TextChunk language: Language diff --git a/src/intelligence_layer/use_cases/classify/prompt_based_classify.py b/src/intelligence_layer/use_cases/classify/prompt_based_classify.py index 5f5c600f7..1c5fd9a83 100644 --- a/src/intelligence_layer/use_cases/classify/prompt_based_classify.py +++ b/src/intelligence_layer/use_cases/classify/prompt_based_classify.py @@ -7,8 +7,8 @@ from intelligence_layer.core import ( ControlModel, + Echo, EchoInput, - EchoTask, LuminousControlModel, RichPrompt, Task, @@ -52,14 +52,14 @@ class PromptBasedClassify(Task[ClassifyInput, SingleLabelClassifyOutput]): Example: >>> from intelligence_layer.core import InMemoryTracer - >>> from intelligence_layer.core import Chunk + >>> from intelligence_layer.core import TextChunk >>> from intelligence_layer.use_cases import ClassifyInput >>> from intelligence_layer.use_cases import PromptBasedClassify >>> task = PromptBasedClassify() >>> input = ClassifyInput( - ... chunk=Chunk("This is a happy text."), labels=frozenset({"positive", "negative"}) + ... chunk=TextChunk("This is a happy text."), labels=frozenset({"positive", "negative"}) ... ) >>> tracer = InMemoryTracer() >>> output = task.run(input, tracer) @@ -73,7 +73,7 @@ def __init__( model: ControlModel = LuminousControlModel("luminous-base-control-20240215"), ) -> None: super().__init__() - self._echo_task = EchoTask(model) + self._echo_task = Echo(model) self._model = model def do_run( diff --git a/src/intelligence_layer/use_cases/qa/long_context_qa.py b/src/intelligence_layer/use_cases/qa/long_context_qa.py index 1c5140522..6cbf6b9f2 100644 --- a/src/intelligence_layer/use_cases/qa/long_context_qa.py +++ b/src/intelligence_layer/use_cases/qa/long_context_qa.py @@ -7,13 +7,13 @@ from intelligence_layer.core import ( Chunk, ChunkInput, - ChunkTask, ControlModel, DetectLanguage, Language, LuminousControlModel, Task, TaskSpan, + TextChunk, ) from intelligence_layer.use_cases.qa.multiple_chunk_qa import ( MultipleChunkQa, @@ -74,7 +74,7 @@ def __init__( ): super().__init__() self._model = model - self._chunk_task = ChunkTask(model, max_tokens_per_chunk) + self._chunk_task = Chunk(model, max_tokens_per_chunk) self._multi_chunk_qa = multi_chunk_qa or MultipleChunkQa(model=model) self._k = k self._language_detector = DetectLanguage(threshold=0.5) @@ -101,7 +101,8 @@ def do_run( multi_chunk_qa_input = MultipleChunkQaInput( chunks=[ - Chunk(result.document_chunk.text) for result in search_output.results + TextChunk(result.document_chunk.text) + for result in search_output.results ], question=input.question, language=input.language, diff --git a/src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py b/src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py index d2726eae6..9b4ba6669 100644 --- a/src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py +++ b/src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py @@ -3,7 +3,7 @@ from pydantic import BaseModel from intelligence_layer.core import Task, TaskSpan -from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.chunk import TextChunk from intelligence_layer.core.detect_language import Language, language_config from intelligence_layer.core.model import ( CompleteInput, @@ -28,7 +28,7 @@ class MultipleChunkQaInput(BaseModel): language: The desired language of the answer. ISO 619 str with language e.g. en, fr, etc. """ - chunks: Sequence[Chunk] + chunks: Sequence[TextChunk] question: str language: Language = Language("en") @@ -44,7 +44,7 @@ class Subanswer(BaseModel): """ answer: Optional[str] - chunk: Chunk + chunk: TextChunk highlights: Sequence[str] @@ -128,7 +128,7 @@ class MultipleChunkQa(Task[MultipleChunkQaInput, MultipleChunkQaOutput]): ... LimitedConcurrencyClient, ... ) >>> from intelligence_layer.core import Language, InMemoryTracer - >>> from intelligence_layer.core.chunk import Chunk + >>> from intelligence_layer.core.chunk import TextChunk >>> from intelligence_layer.use_cases import ( ... MultipleChunkQa, ... MultipleChunkQaInput, @@ -137,7 +137,7 @@ class MultipleChunkQa(Task[MultipleChunkQaInput, MultipleChunkQaOutput]): >>> task = MultipleChunkQa() >>> input = MultipleChunkQaInput( - ... chunks=[Chunk("Tina does not like pizza."), Chunk("Mike is a big fan of pizza.")], + ... chunks=[TextChunk("Tina does not like pizza."), TextChunk("Mike is a big fan of pizza.")], ... question="Who likes pizza?", ... language=Language("en"), ... ) diff --git a/src/intelligence_layer/use_cases/qa/retriever_based_qa.py b/src/intelligence_layer/use_cases/qa/retriever_based_qa.py index 5554aabef..95a4d9f0a 100644 --- a/src/intelligence_layer/use_cases/qa/retriever_based_qa.py +++ b/src/intelligence_layer/use_cases/qa/retriever_based_qa.py @@ -3,7 +3,7 @@ from pydantic import BaseModel from intelligence_layer.connectors.retrievers.base_retriever import ID, BaseRetriever -from intelligence_layer.core import Chunk, Language, Task, TaskSpan +from intelligence_layer.core import Language, Task, TaskSpan, TextChunk from intelligence_layer.use_cases.qa.multiple_chunk_qa import Subanswer from intelligence_layer.use_cases.qa.single_chunk_qa import ( SingleChunkQaInput, @@ -105,7 +105,7 @@ def do_run( sorted_qa_inputs = [ SingleChunkQaInput( - chunk=Chunk(output.document_chunk.text), + chunk=TextChunk(output.document_chunk.text), question=input.question, language=input.language, ) @@ -117,7 +117,7 @@ def do_run( enriched_answers = [ EnrichedSubanswer( answer=answer.answer, - chunk=Chunk(input.document_chunk.text), + chunk=TextChunk(input.document_chunk.text), highlights=answer.highlights, id=input.id, ) diff --git a/src/intelligence_layer/use_cases/qa/single_chunk_qa.py b/src/intelligence_layer/use_cases/qa/single_chunk_qa.py index 7feae3996..37c6cb6eb 100644 --- a/src/intelligence_layer/use_cases/qa/single_chunk_qa.py +++ b/src/intelligence_layer/use_cases/qa/single_chunk_qa.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from intelligence_layer.core import Task, TaskSpan -from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.chunk import TextChunk from intelligence_layer.core.detect_language import Language, language_config from intelligence_layer.core.model import ( CompleteInput, @@ -57,7 +57,7 @@ class SingleChunkQaInput(BaseModel): language: The desired language of the answer. ISO 619 str with language e.g. en, fr, etc. """ - chunk: Chunk + chunk: TextChunk question: str language: Language = Language("en") @@ -91,12 +91,12 @@ class SingleChunkQa(Task[SingleChunkQaInput, SingleChunkQaOutput]): Example: >>> import os >>> from intelligence_layer.core import Language, InMemoryTracer - >>> from intelligence_layer.core import Chunk + >>> from intelligence_layer.core import TextChunk >>> from intelligence_layer.use_cases import SingleChunkQa, SingleChunkQaInput >>> >>> task = SingleChunkQa() >>> input = SingleChunkQaInput( - ... chunk=Chunk("Tina does not like pizza. However, Mike does."), + ... chunk=TextChunk("Tina does not like pizza. However, Mike does."), ... question="Who likes pizza?", ... language=Language("en"), ... ) diff --git a/src/intelligence_layer/use_cases/summarize/steerable_long_context_summarize.py b/src/intelligence_layer/use_cases/summarize/steerable_long_context_summarize.py index 223a8a577..6f821e9a2 100644 --- a/src/intelligence_layer/use_cases/summarize/steerable_long_context_summarize.py +++ b/src/intelligence_layer/use_cases/summarize/steerable_long_context_summarize.py @@ -1,7 +1,7 @@ from typing import Mapping -from intelligence_layer.core import ChunkInput, ChunkTask, Task, TaskSpan -from intelligence_layer.core.chunk import ChunkOutput, ChunkOverlapTask +from intelligence_layer.core import Chunk, ChunkInput, Task, TaskSpan +from intelligence_layer.core.chunk import ChunkOutput, ChunkOverlap from intelligence_layer.core.detect_language import Language from intelligence_layer.core.model import ControlModel, LuminousControlModel from intelligence_layer.use_cases.summarize.steerable_single_chunk_summarize import ( @@ -50,9 +50,9 @@ def __init__( ) self._chunk_task: Task[ChunkInput, ChunkOutput] if overlap_length_tokens == 0: - self._chunk_task = ChunkTask(model, max_tokens_per_chunk) + self._chunk_task = Chunk(model, max_tokens_per_chunk) else: - self._chunk_task = ChunkOverlapTask( + self._chunk_task = ChunkOverlap( model, max_tokens_per_chunk, overlap_length_tokens, diff --git a/src/intelligence_layer/use_cases/summarize/summarize.py b/src/intelligence_layer/use_cases/summarize/summarize.py index 6edf3544c..dbab8137d 100644 --- a/src/intelligence_layer/use_cases/summarize/summarize.py +++ b/src/intelligence_layer/use_cases/summarize/summarize.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from intelligence_layer.core import Chunk, Language +from intelligence_layer.core import Language, TextChunk from intelligence_layer.evaluation import ( BleuGrader, Example, @@ -38,7 +38,7 @@ class PartialSummary(BaseModel): """ summary: str - chunk: Chunk + chunk: TextChunk generated_tokens: int @@ -60,7 +60,7 @@ class SingleChunkSummarizeInput(BaseModel): language: The desired language of the summary. ISO 619 str with language e.g. en, fr, etc. """ - chunk: Chunk + chunk: TextChunk language: Language = Language("en") diff --git a/tests/core/test_chunk.py b/tests/core/test_chunk.py index 0488f9721..e17c7af18 100644 --- a/tests/core/test_chunk.py +++ b/tests/core/test_chunk.py @@ -2,7 +2,7 @@ from intelligence_layer.core import ( ChunkInput, - ChunkOverlapTask, + ChunkOverlap, InMemoryTracer, LuminousControlModel, ) @@ -24,7 +24,7 @@ def test_overlapped_chunking( MAX_TOKENS = 16 tracer = InMemoryTracer() - task = ChunkOverlapTask( + task = ChunkOverlap( model=luminous_control_model, max_tokens_per_chunk=MAX_TOKENS, overlap_length_tokens=OVERLAP, diff --git a/tests/core/test_echo.py b/tests/core/test_echo.py index dfb802b3b..c6c24ff76 100644 --- a/tests/core/test_echo.py +++ b/tests/core/test_echo.py @@ -9,7 +9,7 @@ AlephAlphaClientProtocol, ) from intelligence_layer.core import MAX_CONCURRENCY, NoOpTracer, Task, TaskSpan, Token -from intelligence_layer.core.echo import EchoInput, EchoTask, TokenWithLogProb +from intelligence_layer.core.echo import Echo, EchoInput, TokenWithLogProb from intelligence_layer.core.model import ( CompleteInput, CompleteOutput, @@ -19,8 +19,8 @@ @fixture -def echo_task(luminous_control_model: LuminousControlModel) -> EchoTask: - return EchoTask(luminous_control_model) +def echo_task(luminous_control_model: LuminousControlModel) -> Echo: + return Echo(luminous_control_model) @fixture @@ -89,7 +89,7 @@ def tokenize_completion( ] -def test_can_run_echo_task(echo_task: EchoTask, echo_input: EchoInput) -> None: +def test_can_run_echo_task(echo_task: Echo, echo_input: EchoInput) -> None: result = echo_task.run(echo_input, tracer=NoOpTracer()) tokens = tokenize_completion(echo_input.expected_completion, echo_task._model) @@ -100,7 +100,7 @@ def test_can_run_echo_task(echo_task: EchoTask, echo_input: EchoInput) -> None: def test_echo_works_with_whitespaces_in_expected_completion( - echo_task: EchoTask, + echo_task: Echo, ) -> None: expected_completion = " good." input = EchoInput( @@ -117,7 +117,7 @@ def test_echo_works_with_whitespaces_in_expected_completion( assert token == result_token.token -def test_overlapping_tokens_generate_correct_tokens(echo_task: EchoTask) -> None: +def test_overlapping_tokens_generate_correct_tokens(echo_task: Echo) -> None: """This test checks if the echo task correctly tokenizes the expected completion separately The two tokens when tokenized together will result in a combination of the end of the first token and the start of the second token. This is not the expected behaviour. @@ -145,8 +145,6 @@ def test_overlapping_tokens_generate_correct_tokens(echo_task: EchoTask) -> None def test_run_concurrently_produces_proper_completion_prompts( client: AlephAlphaClientProtocol, echo_input: EchoInput ) -> None: - echo_task = EchoTask( - FakeCompleteTaskModel("luminous-base-control-20240215", client) - ) + echo_task = Echo(FakeCompleteTaskModel("luminous-base-control-20240215", client)) # if this test fails in CI you may need to increase the 50 to 1000 to reproduce this locally echo_task.run_concurrently([echo_input] * MAX_CONCURRENCY * 50, NoOpTracer()) diff --git a/tests/core/test_text_highlight.py b/tests/core/test_text_highlight.py index 7cff615ef..99933c491 100644 --- a/tests/core/test_text_highlight.py +++ b/tests/core/test_text_highlight.py @@ -30,7 +30,9 @@ def aleph_alpha_vanilla_model( @fixture -def text_highlight(aleph_alpha_vanilla_model: AlephAlphaVanillaModel) -> TextHighlight: +def text_highlight( + aleph_alpha_vanilla_model: AlephAlphaVanillaModel, +) -> TextHighlight: return TextHighlight(aleph_alpha_vanilla_model) @@ -82,7 +84,9 @@ def test_text_highlight_with_range_without_highlight( assert not any(h.score > 0 for h in output.highlights) -def test_text_highlight_with_only_one_sentence(text_highlight: TextHighlight) -> None: +def test_text_highlight_with_only_one_sentence( + text_highlight: TextHighlight, +) -> None: prompt_template_str = """What is the Latin name of the brown bear? The answer is Ursus Arctos.{% promptrange r1 %} Explanation should not highlight anything.{% endpromptrange %} Answer:""" template = PromptTemplate(prompt_template_str) diff --git a/tests/use_cases/classify/test_classify.py b/tests/use_cases/classify/test_classify.py index a30875757..f00d10b9a 100644 --- a/tests/use_cases/classify/test_classify.py +++ b/tests/use_cases/classify/test_classify.py @@ -3,7 +3,7 @@ from pytest import fixture from intelligence_layer.connectors import AlephAlphaClientProtocol -from intelligence_layer.core import Chunk, Task +from intelligence_layer.core import Task, TextChunk from intelligence_layer.evaluation import ( Aggregator, DatasetRepository, @@ -77,7 +77,7 @@ def embedding_based_classify_example() -> List[Example[ClassifyInput, Sequence[s return [ Example( input=ClassifyInput( - chunk=Chunk("My university biology class really sucks."), + chunk=TextChunk("My university biology class really sucks."), labels=frozenset(["positive", "negative", "finance", "school"]), ), expected_output=["positive", "school"], @@ -92,21 +92,21 @@ def embedding_based_classify_examples( return embedding_based_classify_example + [ Example( input=ClassifyInput( - chunk=Chunk("My university banking class really sucks."), + chunk=TextChunk("My university banking class really sucks."), labels=frozenset(["positive", "negative", "finance", "school"]), ), expected_output=["negative", "finance", "school"], ), Example( input=ClassifyInput( - chunk=Chunk("I did great on the recent exam."), + chunk=TextChunk("I did great on the recent exam."), labels=frozenset(["positive", "negative", "finance", "school"]), ), expected_output=["positive", "school"], ), Example( input=ClassifyInput( - chunk=Chunk("Dogs are animals"), + chunk=TextChunk("Dogs are animals"), labels=frozenset(["positive", "negative", "finance", "school"]), ), expected_output=[], diff --git a/tests/use_cases/classify/test_embedding_based_classify.py b/tests/use_cases/classify/test_embedding_based_classify.py index 4173f4b01..a7f5ed51c 100644 --- a/tests/use_cases/classify/test_embedding_based_classify.py +++ b/tests/use_cases/classify/test_embedding_based_classify.py @@ -11,7 +11,7 @@ QdrantInMemoryRetriever, ) from intelligence_layer.core import NoOpTracer -from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.chunk import TextChunk from intelligence_layer.use_cases.classify.classify import ( ClassifyInput, MultiLabelClassifyOutput, @@ -101,7 +101,7 @@ def test_embedding_based_classify_returns_score_for_all_labels( embedding_based_classify: EmbeddingBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset({"positive", "negative"}), ) classify_output = embedding_based_classify.run(classify_input, NoOpTracer()) @@ -116,7 +116,7 @@ def test_embedding_based_classify_raises_for_unknown_label( ) -> None: unknown_label = "neutral" classify_input = ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset({"positive", "negative", unknown_label}), ) with raises(ValueError) as _: @@ -127,7 +127,7 @@ def test_embedding_based_classify_works_for_empty_labels_in_request( embedding_based_classify: EmbeddingBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset(), ) result = embedding_based_classify.run(classify_input, NoOpTracer()) @@ -149,7 +149,7 @@ def test_embedding_based_classify_works_without_examples( ] embedding_based_classify = EmbeddingBasedClassify(client, labels_with_examples) classify_input = ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset(), ) result = embedding_based_classify.run(classify_input, NoOpTracer()) diff --git a/tests/use_cases/classify/test_keyword_extract.py b/tests/use_cases/classify/test_keyword_extract.py index e323ce736..a11e70010 100644 --- a/tests/use_cases/classify/test_keyword_extract.py +++ b/tests/use_cases/classify/test_keyword_extract.py @@ -1,7 +1,7 @@ import pytest from intelligence_layer.core import NoOpTracer -from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.chunk import TextChunk from intelligence_layer.core.detect_language import Language, LanguageNotSupportedError from intelligence_layer.use_cases.classify.keyword_extract import ( KeywordExtract, @@ -16,7 +16,7 @@ def keyword_extract() -> KeywordExtract: def test_keyword_extract_works(keyword_extract: KeywordExtract) -> None: input = KeywordExtractInput( - chunk=Chunk("I really like my computer"), language=Language("en") + chunk=TextChunk("I really like my computer"), language=Language("en") ) result = keyword_extract.run(input, NoOpTracer()) @@ -27,7 +27,7 @@ def test_keyword_extract_raises_for_unsupported_language( keyword_extract: KeywordExtract, ) -> None: input = KeywordExtractInput( - chunk=Chunk("text about computers"), language=Language("pt") + chunk=TextChunk("text about computers"), language=Language("pt") ) with pytest.raises(LanguageNotSupportedError) as _: keyword_extract.run(input, NoOpTracer()) diff --git a/tests/use_cases/classify/test_prompt_based_classify.py b/tests/use_cases/classify/test_prompt_based_classify.py index 3e6693db2..87d2d2d7e 100644 --- a/tests/use_cases/classify/test_prompt_based_classify.py +++ b/tests/use_cases/classify/test_prompt_based_classify.py @@ -2,7 +2,7 @@ from pytest import fixture -from intelligence_layer.core import Chunk, InMemoryTracer, NoOpTracer +from intelligence_layer.core import InMemoryTracer, NoOpTracer, TextChunk from intelligence_layer.core.model import LuminousControlModel from intelligence_layer.evaluation import ( Aggregator, @@ -101,7 +101,7 @@ def test_prompt_based_classify_returns_score_for_all_labels( prompt_based_classify: PromptBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset({"positive", "negative"}), ) @@ -116,7 +116,7 @@ def test_prompt_based_classify_accomodates_labels_starting_with_spaces( prompt_based_classify: PromptBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("This is good"), labels=frozenset({" positive", "negative"}) + chunk=TextChunk("This is good"), labels=frozenset({" positive", "negative"}) ) tracer = InMemoryTracer() @@ -130,7 +130,7 @@ def test_prompt_based_classify_accomodates_labels_starting_with_different_spaces prompt_based_classify: PromptBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("This is good"), labels=frozenset({" positive", " positive"}) + chunk=TextChunk("This is good"), labels=frozenset({" positive", " positive"}) ) classify_output = prompt_based_classify.run(classify_input, NoOpTracer()) @@ -144,7 +144,7 @@ def test_prompt_based_classify_sentiment_classification( prompt_based_classify: PromptBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("This is good"), labels=frozenset({"positive", "negative"}) + chunk=TextChunk("This is good"), labels=frozenset({"positive", "negative"}) ) classify_output = prompt_based_classify.run(classify_input, NoOpTracer()) @@ -157,7 +157,7 @@ def test_prompt_based_classify_emotion_classification( prompt_based_classify: PromptBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("I love my job"), + chunk=TextChunk("I love my job"), labels=frozenset({"happy", "sad", "frustrated", "angry"}), ) @@ -171,7 +171,7 @@ def test_prompt_based_classify_handles_labels_starting_with_same_token( prompt_based_classify: PromptBasedClassify, ) -> None: classify_input = ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset({"positive", "positive positive"}), ) @@ -194,7 +194,7 @@ def test_can_evaluate_classify( ) -> None: example = Example( input=ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset({"positive", "negative"}), ), expected_output=["positive"], @@ -231,14 +231,14 @@ def test_can_aggregate_evaluations( positive_lst: Sequence[str] = ["positive"] correct_example = Example( input=ClassifyInput( - chunk=Chunk("This is good"), + chunk=TextChunk("This is good"), labels=frozenset({"positive", "negative"}), ), expected_output=positive_lst, ) incorrect_example = Example( input=ClassifyInput( - chunk=Chunk("This is extremely bad"), + chunk=TextChunk("This is extremely bad"), labels=frozenset({"positive", "negative"}), ), expected_output=positive_lst, diff --git a/tests/use_cases/qa/test_multiple_chunk_qa.py b/tests/use_cases/qa/test_multiple_chunk_qa.py index 5b1af9a45..195f041d9 100644 --- a/tests/use_cases/qa/test_multiple_chunk_qa.py +++ b/tests/use_cases/qa/test_multiple_chunk_qa.py @@ -3,7 +3,7 @@ from pytest import fixture from intelligence_layer.core import NoOpTracer -from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.chunk import TextChunk from intelligence_layer.core.detect_language import Language from intelligence_layer.use_cases.qa.multiple_chunk_qa import ( MultipleChunkQa, @@ -16,13 +16,13 @@ def multiple_chunk_qa() -> MultipleChunkQa: return MultipleChunkQa() -CHUNK_CONTAINING_ANSWER = Chunk( +CHUNK_CONTAINING_ANSWER = TextChunk( "Paul Nicolas lost his mother at the age of 3, and then his father in 1914.[3] He was raised by his mother-in-law together with his brother Henri. " "He began his football career with Saint-Mandé Club in 1916. Initially, he played as a defender, but he quickly realized that his destiny laid at the " "forefront since he scored many goals.[3] In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, " "and these two qualities combined eventually drew the attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916." ) -RELATED_CHUNK_WITHOUT_ANSWER = Chunk( +RELATED_CHUNK_WITHOUT_ANSWER = TextChunk( "In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, and these two qualities combined eventually drew the " "attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916. " ) @@ -34,7 +34,10 @@ def multiple_chunk_qa() -> MultipleChunkQa: def test_multiple_chunk_qa_with_mulitple_chunks( multiple_chunk_qa: MultipleChunkQa, ) -> None: - chunks: Sequence[Chunk] = [CHUNK_CONTAINING_ANSWER, RELATED_CHUNK_WITHOUT_ANSWER] + chunks: Sequence[TextChunk] = [ + CHUNK_CONTAINING_ANSWER, + RELATED_CHUNK_WITHOUT_ANSWER, + ] input = MultipleChunkQaInput(chunks=chunks, question=RELATED_QUESTION) output = multiple_chunk_qa.run(input, NoOpTracer()) @@ -50,7 +53,7 @@ def test_multiple_chunk_qa_with_mulitple_chunks( def test_multiple_chunk_qa_without_answer(multiple_chunk_qa: MultipleChunkQa) -> None: - chunks: Sequence[Chunk] = [CHUNK_CONTAINING_ANSWER] + chunks: Sequence[TextChunk] = [CHUNK_CONTAINING_ANSWER] input = MultipleChunkQaInput(chunks=chunks, question=UNRELATED_QUESTION) output = multiple_chunk_qa.run(input, NoOpTracer()) diff --git a/tests/use_cases/qa/test_single_chunk_qa.py b/tests/use_cases/qa/test_single_chunk_qa.py index 752bdc54c..c487120a2 100644 --- a/tests/use_cases/qa/test_single_chunk_qa.py +++ b/tests/use_cases/qa/test_single_chunk_qa.py @@ -1,6 +1,11 @@ import pytest -from intelligence_layer.core import Chunk, Language, LuminousControlModel, NoOpTracer +from intelligence_layer.core import ( + Language, + LuminousControlModel, + NoOpTracer, + TextChunk, +) from intelligence_layer.core.detect_language import LanguageNotSupportedError from intelligence_layer.use_cases.qa.single_chunk_qa import ( QaSetup, @@ -11,7 +16,7 @@ def test_qa_with_answer(single_chunk_qa: SingleChunkQa) -> None: input = SingleChunkQaInput( - chunk=Chunk( + chunk=TextChunk( "Paul Nicolas lost his mother at the age of 3, and then his father in 1914.[3] He was raised by his mother-in-law together with his brother Henri. He began his football career with Saint-Mandé Club in 1916. Initially, he played as a defender, but he quickly realized that his destiny laid at the forefront since he scored many goals.[3] In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, and these two qualities combined eventually drew the attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916." ), question="What is the name of Paul Nicolas' brother?", @@ -27,7 +32,7 @@ def test_qa_with_answer(single_chunk_qa: SingleChunkQa) -> None: def test_qa_with_no_answer(single_chunk_qa: SingleChunkQa) -> None: input = SingleChunkQaInput( - chunk=Chunk( + chunk=TextChunk( "Paul Nicolas lost his mother at the age of 3, and then his father in 1914.[3] He was raised by his mother-in-law together with his brother Henri. He began his football career with Saint-Mandé Club in 1916. Initially, he played as a defender, but he quickly realized that his destiny laid at the forefront since he scored many goals.[3] In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, and these two qualities combined eventually drew the attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916." ), question="What is the capital of Germany?", @@ -39,7 +44,7 @@ def test_qa_with_no_answer(single_chunk_qa: SingleChunkQa) -> None: def test_language_not_supported_exception(single_chunk_qa: SingleChunkQa) -> None: input = SingleChunkQaInput( - chunk=Chunk( + chunk=TextChunk( "Paul Nicolas stracił matkę w wieku 3 lat, a następnie ojca w 1914 r.[3] Wychowywała go teściowa wraz z bratem Henrim. Karierę piłkarską rozpoczął w klubie Saint-Mandé w 1916 roku. Początkowo grał jako obrońca, ale szybko zdał sobie sprawę, że jego przeznaczeniem jest gra w pierwszym składzie, ponieważ strzelał wiele bramek[3]. Oprócz instynktu bramkarskiego, Nicolas wyróżniał się również silnym charakterem na boisku, a te dwie cechy w połączeniu ostatecznie zwróciły uwagę pana Forta, ówczesnego prezesa klubu Gallia, który podpisał z nim kontrakt jako środkowym napastnikiem w 1916 roku." ), question="Jaka jest stolica Niemiec?", @@ -67,7 +72,7 @@ def test_qa_with_logit_bias_for_no_answer( ) input = SingleChunkQaInput( - chunk=Chunk( + chunk=TextChunk( "Paul Nicolas lost his mother at the age of 3, and then his father in 1914.[3] He was raised by his mother-in-law together with his brother Henri. He began his football career with Saint-Mandé Club in 1916. Initially, he played as a defender, but he quickly realized that his destiny laid at the forefront since he scored many goals.[3] In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, and these two qualities combined eventually drew the attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916." ), question="When did he lose his mother?", diff --git a/tests/use_cases/summarize/conftest.py b/tests/use_cases/summarize/conftest.py index 0afb282b9..e69ed5fcb 100644 --- a/tests/use_cases/summarize/conftest.py +++ b/tests/use_cases/summarize/conftest.py @@ -1,6 +1,6 @@ from pytest import fixture -from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.chunk import TextChunk from intelligence_layer.core.model import LuminousControlModel from intelligence_layer.use_cases.summarize.steerable_long_context_summarize import ( SteerableLongContextSummarize, @@ -18,8 +18,8 @@ def steerable_single_chunk_summarize( @fixture -def chunk() -> Chunk: - return Chunk( +def chunk() -> TextChunk: + return TextChunk( "The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10]" ) diff --git a/tests/use_cases/summarize/test_summarize.py b/tests/use_cases/summarize/test_summarize.py index ee7b9e9f5..938a2417c 100644 --- a/tests/use_cases/summarize/test_summarize.py +++ b/tests/use_cases/summarize/test_summarize.py @@ -1,6 +1,6 @@ from pytest import fixture -from intelligence_layer.core import Chunk, Language, NoOpTracer +from intelligence_layer.core import Language, NoOpTracer, TextChunk from intelligence_layer.evaluation import ( Aggregator, DatasetRepository, @@ -165,7 +165,7 @@ def test_single_chunk_summarize_evaluator( AggregatedSummarizeEvaluation, ], single_chunk_summarize_runner: Runner[str, str], - chunk: Chunk, + chunk: TextChunk, no_op_tracer: NoOpTracer, in_memory_dataset_repository: InMemoryDatasetRepository, ) -> None: diff --git a/tests/use_cases/test_intelligence_starter_app.py b/tests/use_cases/test_intelligence_starter_app.py index 9cee137b7..6594b49a6 100644 --- a/tests/use_cases/test_intelligence_starter_app.py +++ b/tests/use_cases/test_intelligence_starter_app.py @@ -1,7 +1,7 @@ from fastapi import FastAPI, testclient from pytest import fixture -from intelligence_layer.core import Chunk, IntelligenceApp +from intelligence_layer.core import IntelligenceApp, TextChunk from intelligence_layer.use_cases.classify.classify import ClassifyInput from intelligence_layer.use_cases.intelligence_starter_app import IntelligenceStarterApp from intelligence_layer.use_cases.qa.long_context_qa import LongContextQaInput @@ -17,7 +17,7 @@ def test_intelligence_starter_app_classify_works(starter_app: IntelligenceApp) - client = testclient.TestClient(starter_app._fast_api_app) path = "/classify" - input = ClassifyInput(chunk=Chunk("chunk"), labels=frozenset({"cool"})) + input = ClassifyInput(chunk=TextChunk("chunk"), labels=frozenset({"cool"})) response = client.post(path, json=input.model_dump(mode="json")) response.raise_for_status()