Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicit dependency injection #560

Merged
merged 11 commits into from
Feb 29, 2024
2 changes: 1 addition & 1 deletion docs/intelligence_layer.core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Module contents

.. automodule:: intelligence_layer.core

.. autoclass:: Chunk
.. autoclass:: TextChunk
4 changes: 2 additions & 2 deletions src/examples/classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@
"from dotenv import load_dotenv\n",
"\n",
"from intelligence_layer.connectors import LimitedConcurrencyClient\n",
"from intelligence_layer.core import Chunk, InMemoryTracer\n",
"from intelligence_layer.core import TextChunk, InMemoryTracer\n",
"from intelligence_layer.use_cases import ClassifyInput, PromptBasedClassify\n",
"\n",
"load_dotenv()\n",
"\n",
"text_to_classify = Chunk(\n",
"text_to_classify = TextChunk(\n",
" \"In the distant future, a space exploration party embarked on a thrilling journey to the uncharted regions of the galaxy. \\n\\\n",
"With excitement in their hearts and the cosmos as their canvas, they ventured into the unknown, discovering breathtaking celestial wonders. \\n\\\n",
"As they gazed upon distant stars and nebulas, they forged unforgettable memories that would forever bind them as pioneers of the cosmos.\"\n",
Expand Down
8 changes: 4 additions & 4 deletions src/examples/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@
"metadata": {},
"outputs": [],
"source": [
"from intelligence_layer.core import Chunk, NoOpTracer\n",
"from intelligence_layer.core import TextChunk, NoOpTracer\n",
"from intelligence_layer.use_cases import ClassifyInput\n",
"from intelligence_layer.evaluation import Example\n",
"\n",
"\n",
"classify_input = ClassifyInput(\n",
" chunk=Chunk(\"This is good\"),\n",
" chunk=TextChunk(\"This is good\"),\n",
" labels=frozenset({\"positive\", \"negative\"}),\n",
")\n",
"\n",
Expand Down Expand Up @@ -191,7 +191,7 @@
"dataset_id = dataset_repository.create_dataset(\n",
" examples=[\n",
" Example(\n",
" input=ClassifyInput(chunk=Chunk(d[\"text\"]), labels=all_labels),\n",
" input=ClassifyInput(chunk=TextChunk(d[\"text\"]), labels=all_labels),\n",
" expected_output=d[\"label_name\"],\n",
" )\n",
" for d in data\n",
Expand Down Expand Up @@ -391,7 +391,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.6"
}
},
"nbformat": 4,
Expand Down
6 changes: 3 additions & 3 deletions src/intelligence_layer/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from .chunk import Chunk as Chunk
from .chunk import ChunkInput as ChunkInput
from .chunk import ChunkOutput as ChunkOutput
from .chunk import ChunkOverlapTask as ChunkOverlapTask
from .chunk import ChunkTask as ChunkTask
from .chunk import ChunkOverlap as ChunkOverlap
from .chunk import TextChunk as TextChunk
from .detect_language import DetectLanguage as DetectLanguage
from .detect_language import DetectLanguageInput as DetectLanguageInput
from .detect_language import DetectLanguageOutput as DetectLanguageOutput
from .detect_language import Language as Language
from .echo import Echo as Echo
from .echo import EchoInput as EchoInput
from .echo import EchoOutput as EchoOutput
from .echo import EchoTask as EchoTask
from .echo import TokenWithLogProb as TokenWithLogProb
from .instruct import Instruct as Instruct
from .instruct import InstructInput as InstructInput
Expand Down
27 changes: 16 additions & 11 deletions src/intelligence_layer/core/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from pydantic import BaseModel
from semantic_text_splitter import HuggingFaceTextSplitter

from intelligence_layer.core.model import ControlModel
from intelligence_layer.core.model import ControlModel, LuminousControlModel
from intelligence_layer.core.task import Task
from intelligence_layer.core.tracer.tracer import TaskSpan

Chunk = NewType("Chunk", str)
TextChunk = NewType("TextChunk", str)
"""Segment of a larger text.

This type infers that the string is smaller than the context size of the model where it is used.
Expand Down Expand Up @@ -35,10 +35,10 @@ class ChunkOutput(BaseModel):
chunks: A list of smaller sections of the input text.
"""

chunks: Sequence[Chunk]
chunks: Sequence[TextChunk]


class ChunkTask(Task[ChunkInput, ChunkOutput]):
class Chunk(Task[ChunkInput, ChunkOutput]):
"""Splits a longer text into smaller text chunks.

Provide a text of any length and chunk it into smaller pieces using a
Expand All @@ -50,20 +50,23 @@ class ChunkTask(Task[ChunkInput, ChunkOutput]):
max_tokens_per_chunk: The maximum number of tokens to fit into one chunk.
"""

def __init__(self, model: ControlModel, max_tokens_per_chunk: int):
def __init__(
self, model: ControlModel | None = None, max_tokens_per_chunk: int = 512
):
super().__init__()
model = model or LuminousControlModel()
self._splitter = HuggingFaceTextSplitter(model.get_tokenizer())
self._max_tokens_per_chunk = max_tokens_per_chunk

def do_run(self, input: ChunkInput, task_span: TaskSpan) -> ChunkOutput:
chunks = [
Chunk(t)
TextChunk(t)
for t in self._splitter.chunks(input.text, self._max_tokens_per_chunk)
]
return ChunkOutput(chunks=chunks)


class ChunkOverlapTask(Task[ChunkInput, ChunkOutput]):
class ChunkOverlap(Task[ChunkInput, ChunkOutput]):
"""Splits a longer text into smaller text chunks, where every chunk overlaps
with the previous chunk by `overlap_length_tokens` number of tokens.

Expand All @@ -79,9 +82,9 @@ class ChunkOverlapTask(Task[ChunkInput, ChunkOutput]):

def __init__(
self,
model: ControlModel,
max_tokens_per_chunk: int,
overlap_length_tokens: int,
model: ControlModel | None = None,
max_tokens_per_chunk: int = 512,
overlap_length_tokens: int = 0,
):
super().__init__()
if overlap_length_tokens >= max_tokens_per_chunk:
Expand All @@ -90,8 +93,10 @@ def __init__(
overlap_length_tokens, max_tokens_per_chunk
)
)
self.chunk_task = ChunkTask(model, overlap_length_tokens // 2)

model = model or LuminousControlModel()
self.tokenizer = model.get_tokenizer()
self.chunk_task = Chunk(model, overlap_length_tokens // 2)
self.max_tokens_per_chunk = max_tokens_per_chunk
self.overlap_length_tokens = overlap_length_tokens

Expand Down
20 changes: 12 additions & 8 deletions src/intelligence_layer/core/echo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from pydantic import BaseModel
from tokenizers import Encoding # type: ignore

from intelligence_layer.core.model import CompleteInput, ControlModel
from intelligence_layer.core.model import (
CompleteInput,
ControlModel,
LuminousControlModel,
)
from intelligence_layer.core.prompt_template import PromptTemplate
from intelligence_layer.core.task import Task, Token
from intelligence_layer.core.tracer.tracer import TaskSpan
Expand All @@ -18,7 +22,7 @@ class TokenWithLogProb(BaseModel):


class EchoInput(BaseModel):
"""The input for an `EchoTask`.
"""The input for an `Echo` task.

Attributes:
prompt: The input text that serves as the starting point for the LLM.
Expand All @@ -31,7 +35,7 @@ class EchoInput(BaseModel):


class EchoOutput(BaseModel):
"""The output of an `EchoTask`.
"""The output of an `Echo` task.

Attributes:
tokens_with_log_probs: Every token of the `expected_completion` of the
Expand All @@ -42,7 +46,7 @@ class EchoOutput(BaseModel):
tokens_with_log_probs: Sequence[TokenWithLogProb]


class EchoTask(Task[EchoInput, EchoOutput]):
class Echo(Task[EchoInput, EchoOutput]):
"""Task that returns probabilities of a completion given a prompt.

Analyzes the likelihood of generating tokens in the expected completion based on
Expand All @@ -53,10 +57,10 @@ class EchoTask(Task[EchoInput, EchoOutput]):

Example:
>>> from aleph_alpha_client import Prompt
>>> from intelligence_layer.core import EchoTask,EchoInput, InMemoryTracer, LuminousControlModel
>>> from intelligence_layer.core import Echo, EchoInput, InMemoryTracer, LuminousControlModel

>>> model = LuminousControlModel(name="luminous-base-control")
>>> task = EchoTask(model)
>>> task = Echo(model)
>>> input = EchoInput(
... prompt=Prompt.from_text("This is a "),
... expected_completion="happy text",
Expand All @@ -67,9 +71,9 @@ class EchoTask(Task[EchoInput, EchoOutput]):

PROMPT_TEMPLATE_STR: str = "{{prompt}}{{expected_completion}}"

def __init__(self, model: ControlModel) -> None:
def __init__(self, model: ControlModel | None = None) -> None:
super().__init__()
self._model = model
self._model = model or LuminousControlModel()

def do_run(self, input: EchoInput, task_span: TaskSpan) -> EchoOutput:
# We tokenize the prompt separately so we don't have an overlap in the tokens.
Expand Down
12 changes: 9 additions & 3 deletions src/intelligence_layer/core/instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

from pydantic import BaseModel

from intelligence_layer.core.model import CompleteInput, CompleteOutput, ControlModel
from intelligence_layer.core.model import (
CompleteInput,
CompleteOutput,
ControlModel,
LuminousControlModel,
)
from intelligence_layer.core.task import Task
from intelligence_layer.core.tracer.tracer import TaskSpan

Expand All @@ -15,9 +20,10 @@ class InstructInput(BaseModel):


class Instruct(Task[InstructInput, CompleteOutput]):
def __init__(self, model: ControlModel) -> None:

def __init__(self, model: ControlModel | None = None) -> None:
super().__init__()
self._model = model
self._model = model or LuminousControlModel()

def do_run(self, input: InstructInput, task_span: TaskSpan) -> CompleteOutput:
prompt = self._model.to_instruct_prompt(
Expand Down
2 changes: 1 addition & 1 deletion src/intelligence_layer/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def __init__(
"luminous-base-control-20240215",
"luminous-extended-control-20240215",
"luminous-supreme-control-20240215",
],
] = "luminous-base-control",
client: Optional[AlephAlphaClientProtocol] = None,
) -> None:
super().__init__(name, client)
Expand Down
11 changes: 8 additions & 3 deletions src/intelligence_layer/core/text_highlight.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
from aleph_alpha_client.explanation import TextScoreWithRaw
from pydantic import BaseModel

from intelligence_layer.core.model import ControlModel, ExplainInput, ExplainOutput
from intelligence_layer.core.model import (
ControlModel,
ExplainInput,
ExplainOutput,
LuminousControlModel,
)
from intelligence_layer.core.prompt_template import (
Cursor,
PromptRange,
Expand Down Expand Up @@ -97,11 +102,11 @@ class TextHighlight(Task[TextHighlightInput, TextHighlightOutput]):

def __init__(
self,
model: ControlModel,
model: ControlModel | None = None,
granularity: PromptGranularity = PromptGranularity.Sentence,
) -> None:
super().__init__()
self._model = model
self._model = model or LuminousControlModel()
self._granularity = granularity

def do_run(
Expand Down
4 changes: 2 additions & 2 deletions src/intelligence_layer/core/tracer/composite_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ class CompositeTracer(Tracer, Generic[TracerVar]):
tracers: tracers that will be forwarded all subsequent log and span calls.

Example:
>>> from intelligence_layer.core import InMemoryTracer, FileTracer, CompositeTracer, Chunk
>>> from intelligence_layer.core import InMemoryTracer, FileTracer, CompositeTracer, TextChunk
>>> from intelligence_layer.use_cases import PromptBasedClassify, ClassifyInput

>>> tracer_1 = InMemoryTracer()
>>> tracer_2 = InMemoryTracer()
>>> tracer = CompositeTracer([tracer_1, tracer_2])
>>> task = PromptBasedClassify()
>>> response = task.run(ClassifyInput(chunk=Chunk("Cool"), labels=frozenset({"label", "other label"})), tracer)
>>> response = task.run(ClassifyInput(chunk=TextChunk("Cool"), labels=frozenset({"label", "other label"})), tracer)
"""

def __init__(self, tracers: Sequence[TracerVar]) -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/intelligence_layer/use_cases/classify/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pydantic import BaseModel

from intelligence_layer.core import Chunk
from intelligence_layer.core import TextChunk
from intelligence_layer.evaluation import Example, MeanAccumulator
from intelligence_layer.evaluation.base_logic import (
AggregationLogic,
Expand All @@ -21,7 +21,7 @@ class ClassifyInput(BaseModel):
labels: Possible labels the model will choose a label from
"""

chunk: Chunk
chunk: TextChunk
labels: frozenset[str]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
QdrantInMemoryRetriever,
RetrieverType,
)
from intelligence_layer.core import Chunk, Task, TaskSpan
from intelligence_layer.core import Task, TaskSpan, TextChunk
from intelligence_layer.use_cases.classify.classify import (
ClassifyInput,
MultiLabelClassifyOutput,
Expand Down Expand Up @@ -129,7 +129,7 @@ class EmbeddingBasedClassify(Task[ClassifyInput, MultiLabelClassifyOutput]):
>>> from intelligence_layer.connectors.limited_concurrency_client import (
... LimitedConcurrencyClient,
... )
>>> from intelligence_layer.core import Chunk, InMemoryTracer
>>> from intelligence_layer.core import TextChunk, InMemoryTracer
>>> from intelligence_layer.use_cases.classify.classify import ClassifyInput
>>> from intelligence_layer.use_cases.classify.embedding_based_classify import (
... EmbeddingBasedClassify,
Expand All @@ -153,7 +153,7 @@ class EmbeddingBasedClassify(Task[ClassifyInput, MultiLabelClassifyOutput]):
... ]
>>> client = LimitedConcurrencyClient.from_env()
>>> task = EmbeddingBasedClassify(client, labels_with_examples)
>>> input = ClassifyInput(chunk=Chunk("This is a happy text."), labels=frozenset({"positive", "negative"}))
>>> input = ClassifyInput(chunk=TextChunk("This is a happy text."), labels=frozenset({"positive", "negative"}))
>>> tracer = InMemoryTracer()
>>> output = task.run(input, tracer)
"""
Expand Down Expand Up @@ -212,7 +212,7 @@ def _validate_input_labels(self, input: ClassifyInput) -> None:
raise ValueError(f"Got unexpected labels: {', '.join(unknown_labels)}.")

def _label_search(
self, chunk: Chunk, label: str, task_span: TaskSpan
self, chunk: TextChunk, label: str, task_span: TaskSpan
) -> SearchOutput[int]:
search_input = QdrantSearchInput(
query=chunk,
Expand Down
4 changes: 2 additions & 2 deletions src/intelligence_layer/use_cases/classify/keyword_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pydantic import BaseModel

from intelligence_layer.core import Task, TaskSpan
from intelligence_layer.core.chunk import Chunk
from intelligence_layer.core.chunk import TextChunk
from intelligence_layer.core.detect_language import Language, language_config
from intelligence_layer.core.model import (
CompleteInput,
Expand Down Expand Up @@ -31,7 +31,7 @@


class KeywordExtractInput(BaseModel):
chunk: Chunk
chunk: TextChunk
language: Language


Expand Down
Loading