Skip to content

Commit

Permalink
logprobs support for OpenAI completion models, refs #284
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 19, 2023
1 parent 2b50427 commit 4fea461
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 12 deletions.
18 changes: 10 additions & 8 deletions docs/aliases.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ result = CliRunner().invoke(cli, ["aliases", "list"])
cog.out("```\n{}```".format(result.output))
]]] -->
```
3.5 : gpt-3.5-turbo
chatgpt : gpt-3.5-turbo
chatgpt-16k : gpt-3.5-turbo-16k
3.5-16k : gpt-3.5-turbo-16k
4 : gpt-4
gpt4 : gpt-4
4-32k : gpt-4-32k
ada : ada-002 (embedding)
3.5 : gpt-3.5-turbo
chatgpt : gpt-3.5-turbo
chatgpt-16k : gpt-3.5-turbo-16k
3.5-16k : gpt-3.5-turbo-16k
4 : gpt-4
gpt4 : gpt-4
4-32k : gpt-4-32k
3.5-instruct : gpt-3.5-turbo-instruct
chatgpt-instruct : gpt-3.5-turbo-instruct
ada : ada-002 (embedding)
```
<!-- [[[end]]] -->

Expand Down
28 changes: 28 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,34 @@ OpenAI Chat: gpt-4-32k (aliases: 4-32k)
presence_penalty: float
stop: str
logit_bias: dict, str
OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
temperature: float
What sampling temperature to use, between 0 and 2. Higher values like
0.8 will make the output more random, while lower values like 0.2 will
make it more focused and deterministic.
max_tokens: int
Maximum number of tokens to generate.
top_p: float
An alternative to sampling with temperature, called nucleus sampling,
where the model considers the results of the tokens with top_p
probability mass. So 0.1 means only the tokens comprising the top 10%
probability mass are considered. Recommended to use top_p or
temperature but not both.
frequency_penalty: float
Number between -2.0 and 2.0. Positive values penalize new tokens based
on their existing frequency in the text so far, decreasing the model's
likelihood to repeat the same line verbatim.
presence_penalty: float
Number between -2.0 and 2.0. Positive values penalize new tokens based
on whether they appear in the text so far, increasing the model's
likelihood to talk about new topics.
stop: str
A string where the API will stop generating further tokens.
logit_bias: dict, str
Modify the likelihood of specified tokens appearing in the completion.
Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
logprobs: int
Include the log probabilities of most likely N per token
```
<!-- [[[end]]] -->
Expand Down
1 change: 1 addition & 0 deletions llm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def read_prompt():
else:
print(response.text())
except Exception as ex:
raise
raise click.ClickException(str(ex))

# Log to the database
Expand Down
25 changes: 25 additions & 0 deletions llm/default_plugins/openai_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,13 @@ def build_kwargs(self, prompt):


class Completion(Chat):
class Options(Chat.Options):
logprobs: Optional[int] = Field(
description="Include the log probabilities of most likely N per token",
default=None,
le=5,
)

def __init__(self, *args, default_max_tokens=None, **kwargs):
super().__init__(*args, **kwargs)
self.default_max_tokens = default_max_tokens
Expand Down Expand Up @@ -365,8 +372,24 @@ def combine_chunks(chunks: List[dict]) -> dict:
role = None
finish_reason = None

# If any of them have log probability, we're going to persist
# those later on
logprobs = []

for item in chunks:
for choice in item["choices"]:
if (
"logprobs" in choice
and "text" in choice
and isinstance(choice["logprobs"], dict)
and "top_logprobs" in choice["logprobs"]
):
logprobs.append(
{
"text": choice["text"],
"top_logprobs": choice["logprobs"]["top_logprobs"],
}
)
if "text" in choice and "delta" not in choice:
content += choice["text"]
continue
Expand All @@ -383,6 +406,8 @@ def combine_chunks(chunks: List[dict]) -> dict:
"role": role,
"finish_reason": finish_reason,
}
if logprobs:
combined["logprobs"] = logprobs
for key in ("id", "object", "model", "created", "index"):
if key in chunks[0]:
combined[key] = chunks[0][key]
Expand Down
117 changes: 114 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,10 @@ def mocked_openai_chat(requests_mock):
@pytest.fixture
def mocked_openai_chat_stream(requests_mock):
def stream_events(*args):
print(args)
for delta, finish_reason in (
({"role": "assistant", "content": ""}, None),
({"content": "Hi"}, None),
({"content": "!"}, None),
({"content": "."}, None),
({}, "stop"),
):
yield "data: {}\n\n".format(
Expand All @@ -174,8 +173,9 @@ def stream_events(*args):
}
)
).encode("utf-8")
yield "data: [DONE]\n\n".encode("utf-8")

requests_mock.post(
return requests_mock.post(
"https://api.openai.com/v1/chat/completions",
content=b"".join(stream_events()),
headers={"Content-Type": "text/event-stream"},
Expand Down Expand Up @@ -205,6 +205,117 @@ def mocked_openai_completion(requests_mock):
)


@pytest.fixture
def mocked_openai_completion_logprobs_stream(requests_mock):
choices_chunks = [
[
{
"text": "\n\n",
"index": 0,
"logprobs": {
"tokens": ["\n\n"],
"token_logprobs": [-0.6],
"top_logprobs": [{"\n\n": -0.6, "\n": -1.9}],
"text_offset": [16],
},
"finish_reason": None,
}
],
[
{
"text": "Hi",
"index": 0,
"logprobs": {
"tokens": ["Hi"],
"token_logprobs": [-1.1],
"top_logprobs": [{"Hi": -1.1, "Hello": -0.7}],
"text_offset": [18],
},
"finish_reason": None,
}
],
[
{
"text": ".",
"index": 0,
"logprobs": {
"tokens": ["."],
"token_logprobs": [-1.1],
"top_logprobs": [{".": -1.1, "!": -0.9}],
"text_offset": [20],
},
"finish_reason": None,
}
],
[
{
"text": "",
"index": 0,
"logprobs": {
"tokens": [],
"token_logprobs": [],
"top_logprobs": [],
"text_offset": [],
},
"finish_reason": "stop",
}
],
]

def stream_events():
for choices in choices_chunks:
yield "data: {}\n\n".format(
json.dumps(
{
"id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
"object": "text_completion",
"created": 1695097702,
"choices": choices,
"model": "gpt-3.5-turbo-instruct",
}
)
).encode("utf-8")
yield "data: [DONE]\n\n".encode("utf-8")

return requests_mock.post(
"https://api.openai.com/v1/completions",
content=b"".join(stream_events()),
headers={"Content-Type": "text/event-stream"},
)


@pytest.fixture
def mocked_openai_completion_logprobs(requests_mock):
return requests_mock.post(
"https://api.openai.com/v1/completions",
json={
"id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL",
"object": "text_completion",
"created": 1695097747,
"model": "gpt-3.5-turbo-instruct",
"choices": [
{
"text": "\n\nHi.",
"index": 0,
"logprobs": {
"tokens": ["\n\n", "Hi", "1"],
"token_logprobs": [-0.6, -1.1, -0.9],
"top_logprobs": [
{"\n\n": -0.6, "\n": -1.9},
{"Hi": -1.1, "Hello": -0.7},
{".": -0.9, "!": -1.1},
],
"text_offset": [16, 18, 20],
},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 5, "completion_tokens": 3, "total_tokens": 8},
},
headers={"Content-Type": "application/json"},
)


@pytest.fixture
def mocked_localai(requests_mock):
return requests_mock.post(
Expand Down
92 changes: 91 additions & 1 deletion tests/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def test_openai_chat_stream(mocked_openai_chat_stream, user_path):
runner = CliRunner()
result = runner.invoke(cli, ["-m", "gpt-3.5-turbo", "--key", "x", "Say hi"])
assert result.exit_code == 0
assert result.output == "Hi!\n"
assert result.output == "Hi.\n"


def test_openai_completion(mocked_openai_completion, user_path):
Expand Down Expand Up @@ -344,6 +344,96 @@ def test_openai_completion(mocked_openai_completion, user_path):
assert expected.items() <= row.items()


def test_openai_completion_logprobs_stream(
mocked_openai_completion_logprobs_stream, user_path
):
log_path = user_path / "logs.db"
log_db = sqlite_utils.Database(str(log_path))
log_db["responses"].delete_where()
runner = CliRunner()
args = [
"-m",
"gpt-3.5-turbo-instruct",
"Say hi",
"-o",
"logprobs",
"2",
"--key",
"x",
]
result = runner.invoke(cli, args, catch_exceptions=False)
assert result.exit_code == 0
assert result.output == "\n\nHi.\n"
rows = list(log_db["responses"].rows)
assert len(rows) == 1
row = rows[0]
assert json.loads(row["response_json"]) == {
"content": "\n\nHi.",
"role": None,
"finish_reason": None,
"logprobs": [
{"text": "\n\n", "top_logprobs": [{"\n\n": -0.6, "\n": -1.9}]},
{"text": "Hi", "top_logprobs": [{"Hi": -1.1, "Hello": -0.7}]},
{"text": ".", "top_logprobs": [{".": -1.1, "!": -0.9}]},
{"text": "", "top_logprobs": []},
],
"id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
"object": "text_completion",
"model": "gpt-3.5-turbo-instruct",
"created": 1695097702,
}


def test_openai_completion_logprobs_nostream(
mocked_openai_completion_logprobs, user_path
):
log_path = user_path / "logs.db"
log_db = sqlite_utils.Database(str(log_path))
log_db["responses"].delete_where()
runner = CliRunner()
args = [
"-m",
"gpt-3.5-turbo-instruct",
"Say hi",
"-o",
"logprobs",
"2",
"--key",
"x",
"--no-stream",
]
result = runner.invoke(cli, args, catch_exceptions=False)
assert result.exit_code == 0
assert result.output == "\n\nHi.\n"
rows = list(log_db["responses"].rows)
assert len(rows) == 1
row = rows[0]
assert json.loads(row["response_json"]) == {
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": {
"text_offset": [16, 18, 20],
"token_logprobs": [-0.6, -1.1, -0.9],
"tokens": ["\n\n", "Hi", "1"],
"top_logprobs": [
{"\n": -1.9, "\n\n": -0.6},
{"Hello": -0.7, "Hi": -1.1},
{"!": -1.1, ".": -0.9},
],
},
"text": "\n\nHi.",
}
],
"created": 1695097747,
"id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL",
"model": "gpt-3.5-turbo-instruct",
"object": "text_completion",
"usage": {"completion_tokens": 3, "prompt_tokens": 5, "total_tokens": 8},
}


EXTRA_MODELS_YAML = """
- model_id: orca
model_name: orca-mini-3b
Expand Down

0 comments on commit 4fea461

Please sign in to comment.