Skip to content

Commit

Permalink
Support abstract LargeLanguageModel-class to integrate with other APIs
Browse files Browse the repository at this point in the history
adjust changelog

linting

hopefully improve flaky QA test

hopefully fix test

address PR comments

revert change in SingleChunkQa
  • Loading branch information
NickyHavoc committed Aug 20, 2024
1 parent f2d7f45 commit 9e51f8c
Show file tree
Hide file tree
Showing 7 changed files with 329 additions and 93 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
...

### Features
- Remove cap for `max_concurrency` in `LimitedConcurrencyClient`.
- Remove cap for `max_concurrency` in `LimitedConcurrencyClient`.
- Abstract `LanguageModel` class to integrate with LLMs from any API
- Abstract `ChatModel` class to integrate with chat models from any API
- Every `LanguageModel` supports echo to retrieve log probs for an expected completion given a prompt

### Fixes
- increase number of returned `log_probs` in `EloQaEvaluationLogic` to avoid missing a valid answer
Expand Down
4 changes: 4 additions & 0 deletions src/intelligence_layer/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,18 @@
from .instruct import Instruct as Instruct
from .instruct import InstructInput as InstructInput
from .model import AlephAlphaModel as AlephAlphaModel
from .model import ChatModel as ChatModel
from .model import CompleteInput as CompleteInput
from .model import CompleteOutput as CompleteOutput
from .model import ControlModel as ControlModel
from .model import ExplainInput as ExplainInput
from .model import ExplainOutput as ExplainOutput
from .model import LanguageModel as LanguageModel
from .model import Llama2InstructModel as Llama2InstructModel
from .model import Llama3ChatModel as Llama3ChatModel
from .model import Llama3InstructModel as Llama3InstructModel
from .model import LuminousControlModel as LuminousControlModel
from .model import Message as Message
from .prompt_template import Cursor as Cursor
from .prompt_template import PromptItemCursor as PromptItemCursor
from .prompt_template import PromptRange as PromptRange
Expand Down
73 changes: 20 additions & 53 deletions src/intelligence_layer/core/echo.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from collections.abc import Sequence
from typing import NewType

from aleph_alpha_client import Prompt, Tokens
from aleph_alpha_client import Prompt, Text
from pydantic import BaseModel
from tokenizers import Encoding # type: ignore

from intelligence_layer.core.model import AlephAlphaModel, CompleteInput
from intelligence_layer.core.prompt_template import PromptTemplate
from intelligence_layer.core.model import AlephAlphaModel
from intelligence_layer.core.task import Task, Token
from intelligence_layer.core.tracer.tracer import TaskSpan

Expand Down Expand Up @@ -73,56 +71,25 @@ def __init__(self, model: AlephAlphaModel) -> None:
self._model = model

def do_run(self, input: EchoInput, task_span: TaskSpan) -> EchoOutput:
# We tokenize the prompt separately so we don't have an overlap in the tokens.
# If we don't do this, the end of the prompt and expected completion can be merged into unexpected tokens.
expected_completion_tokens = self._tokenize(input.expected_completion)
prompt_template = PromptTemplate(self.PROMPT_TEMPLATE_STR)
prompt = prompt_template.to_rich_prompt(
prompt=prompt_template.embed_prompt(input.prompt),
expected_completion=prompt_template.placeholder(
Tokens.from_token_ids(
[token.token_id for token in expected_completion_tokens]
)
),
)
output = self._model.complete(
CompleteInput(
prompt=prompt,
maximum_tokens=0,
log_probs=0,
tokens=True,
echo=True,
),
task_span,
)
assert output.completions[0].log_probs
log_prob_dicts = output.completions[0].log_probs[
-len(expected_completion_tokens) :
]
tokens_with_prob = []
for token, log_prob in zip(
expected_completion_tokens, log_prob_dicts, strict=True
):
assert token.token in log_prob
tokens_with_prob.append(
TokenWithLogProb(
token=token,
prob=LogProb(log_prob.get(token.token, 0.0) or 0.0),
)
if len(input.prompt.items) != 1:
raise NotImplementedError(
"`Echo` currently only supports prompts with one item."
)
return EchoOutput(tokens_with_log_probs=tokens_with_prob)

def _tokenize(self, text: str) -> Sequence[Token]:
# Turns the expected output into list of token ids. Important so that we know how many tokens
# the label is and can retrieve the last N log probs for the label
tokenizer = self._model.get_tokenizer()
if tokenizer.pre_tokenizer:
tokenizer.pre_tokenizer.add_prefix_space = False
encoding: Encoding = tokenizer.encode(text)
return [
Token(
token=tokenizer.decode([token_id], skip_special_tokens=False),
token_id=token_id,
if not isinstance(input.prompt.items[0], Text):
raise NotImplementedError(
"`Echo` currently only supports prompts that are of type `Text`."
)
for token_id in encoding.ids

echo_output = self._model.echo(
input.prompt.items[0].text, input.expected_completion, task_span
)

tokens_with_prob = [
TokenWithLogProb(
token=token,
prob=LogProb(log_prob or 0.0),
)
for token, log_prob in echo_output
]
return EchoOutput(tokens_with_log_probs=tokens_with_prob)
Loading

0 comments on commit 9e51f8c

Please sign in to comment.