Support abstract LargeLanguageModel-class to integrate with other APIs

adjust changelog linting hopefully improve flaky QA test hopefully fix test address PR comments revert change in SingleChunkQa
Aleph-Alpha · Aug 20, 2024 · 9e51f8c · 9e51f8c
1 parent f2d7f45
commit 9e51f8c
Show file tree

Hide file tree

Showing 7 changed files with 329 additions and 93 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,7 +7,10 @@
 ...
 
 ### Features
- - Remove cap for `max_concurrency` in `LimitedConcurrencyClient`.
+- Remove cap for `max_concurrency` in `LimitedConcurrencyClient`.
+- Abstract `LanguageModel` class to integrate with LLMs from any API
+- Abstract `ChatModel` class to integrate with chat models from any API
+- Every `LanguageModel` supports echo to retrieve log probs for an expected completion given a prompt
 
 ### Fixes
 - increase number of returned `log_probs` in `EloQaEvaluationLogic` to avoid missing a valid answer

diff --git a/src/intelligence_layer/core/__init__.py b/src/intelligence_layer/core/__init__.py
@@ -16,14 +16,18 @@
 from .instruct import Instruct as Instruct
 from .instruct import InstructInput as InstructInput
 from .model import AlephAlphaModel as AlephAlphaModel
+from .model import ChatModel as ChatModel
 from .model import CompleteInput as CompleteInput
 from .model import CompleteOutput as CompleteOutput
 from .model import ControlModel as ControlModel
 from .model import ExplainInput as ExplainInput
 from .model import ExplainOutput as ExplainOutput
+from .model import LanguageModel as LanguageModel
 from .model import Llama2InstructModel as Llama2InstructModel
+from .model import Llama3ChatModel as Llama3ChatModel
 from .model import Llama3InstructModel as Llama3InstructModel
 from .model import LuminousControlModel as LuminousControlModel
+from .model import Message as Message
 from .prompt_template import Cursor as Cursor
 from .prompt_template import PromptItemCursor as PromptItemCursor
 from .prompt_template import PromptRange as PromptRange

diff --git a/src/intelligence_layer/core/echo.py b/src/intelligence_layer/core/echo.py
@@ -1,12 +1,10 @@
 from collections.abc import Sequence
 from typing import NewType
 
-from aleph_alpha_client import Prompt, Tokens
+from aleph_alpha_client import Prompt, Text
 from pydantic import BaseModel
-from tokenizers import Encoding  # type: ignore
 
-from intelligence_layer.core.model import AlephAlphaModel, CompleteInput
-from intelligence_layer.core.prompt_template import PromptTemplate
+from intelligence_layer.core.model import AlephAlphaModel
 from intelligence_layer.core.task import Task, Token
 from intelligence_layer.core.tracer.tracer import TaskSpan
 
@@ -73,56 +71,25 @@ def __init__(self, model: AlephAlphaModel) -> None:
         self._model = model
 
     def do_run(self, input: EchoInput, task_span: TaskSpan) -> EchoOutput:
-        # We tokenize the prompt separately so we don't have an overlap in the tokens.
-        # If we don't do this, the end of the prompt and expected completion can be merged into unexpected tokens.
-        expected_completion_tokens = self._tokenize(input.expected_completion)
-        prompt_template = PromptTemplate(self.PROMPT_TEMPLATE_STR)
-        prompt = prompt_template.to_rich_prompt(
-            prompt=prompt_template.embed_prompt(input.prompt),
-            expected_completion=prompt_template.placeholder(
-                Tokens.from_token_ids(
-                    [token.token_id for token in expected_completion_tokens]
-                )
-            ),
-        )
-        output = self._model.complete(
-            CompleteInput(
-                prompt=prompt,
-                maximum_tokens=0,
-                log_probs=0,
-                tokens=True,
-                echo=True,
-            ),
-            task_span,
-        )
-        assert output.completions[0].log_probs
-        log_prob_dicts = output.completions[0].log_probs[
-            -len(expected_completion_tokens) :
-        ]
-        tokens_with_prob = []
-        for token, log_prob in zip(
-            expected_completion_tokens, log_prob_dicts, strict=True
-        ):
-            assert token.token in log_prob
-            tokens_with_prob.append(
-                TokenWithLogProb(
-                    token=token,
-                    prob=LogProb(log_prob.get(token.token, 0.0) or 0.0),
-                )
+        if len(input.prompt.items) != 1:
+            raise NotImplementedError(
+                "`Echo` currently only supports prompts with one item."
             )
-        return EchoOutput(tokens_with_log_probs=tokens_with_prob)
 
-    def _tokenize(self, text: str) -> Sequence[Token]:
-        # Turns the expected output into list of token ids. Important so that we know how many tokens
-        # the label is and can retrieve the last N log probs for the label
-        tokenizer = self._model.get_tokenizer()
-        if tokenizer.pre_tokenizer:
-            tokenizer.pre_tokenizer.add_prefix_space = False
-        encoding: Encoding = tokenizer.encode(text)
-        return [
-            Token(
-                token=tokenizer.decode([token_id], skip_special_tokens=False),
-                token_id=token_id,
+        if not isinstance(input.prompt.items[0], Text):
+            raise NotImplementedError(
+                "`Echo` currently only supports prompts that are of type `Text`."
             )
-            for token_id in encoding.ids
+
+        echo_output = self._model.echo(
+            input.prompt.items[0].text, input.expected_completion, task_span
+        )
+
+        tokens_with_prob = [
+            TokenWithLogProb(
+                token=token,
+                prob=LogProb(log_prob or 0.0),
+            )
+            for token, log_prob in echo_output
         ]
+        return EchoOutput(tokens_with_log_probs=tokens_with_prob)