logprobs support for OpenAI completion models, refs #284

simonw · Sep 19, 2023 · 4fea461 · 4fea461
1 parent 2b50427
commit 4fea461
Show file tree

Hide file tree

Showing 6 changed files with 269 additions and 12 deletions.
diff --git a/docs/aliases.md b/docs/aliases.md
@@ -19,14 +19,16 @@ result = CliRunner().invoke(cli, ["aliases", "list"])
 cog.out("```\n{}```".format(result.output))
 ]]] -->
 ```
-3.5         : gpt-3.5-turbo
-chatgpt     : gpt-3.5-turbo
-chatgpt-16k : gpt-3.5-turbo-16k
-3.5-16k     : gpt-3.5-turbo-16k
-4           : gpt-4
-gpt4        : gpt-4
-4-32k       : gpt-4-32k
-ada         : ada-002 (embedding)
+3.5              : gpt-3.5-turbo
+chatgpt          : gpt-3.5-turbo
+chatgpt-16k      : gpt-3.5-turbo-16k
+3.5-16k          : gpt-3.5-turbo-16k
+4                : gpt-4
+gpt4             : gpt-4
+4-32k            : gpt-4-32k
+3.5-instruct     : gpt-3.5-turbo-instruct
+chatgpt-instruct : gpt-3.5-turbo-instruct
+ada              : ada-002 (embedding)
 ```
 <!-- [[[end]]] -->
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -256,6 +256,34 @@ OpenAI Chat: gpt-4-32k (aliases: 4-32k)
   presence_penalty: float
   stop: str
   logit_bias: dict, str
+OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
+  temperature: float
+    What sampling temperature to use, between 0 and 2. Higher values like
+    0.8 will make the output more random, while lower values like 0.2 will
+    make it more focused and deterministic.
+  max_tokens: int
+    Maximum number of tokens to generate.
+  top_p: float
+    An alternative to sampling with temperature, called nucleus sampling,
+    where the model considers the results of the tokens with top_p
+    probability mass. So 0.1 means only the tokens comprising the top 10%
+    probability mass are considered. Recommended to use top_p or
+    temperature but not both.
+  frequency_penalty: float
+    Number between -2.0 and 2.0. Positive values penalize new tokens based
+    on their existing frequency in the text so far, decreasing the model's
+    likelihood to repeat the same line verbatim.
+  presence_penalty: float
+    Number between -2.0 and 2.0. Positive values penalize new tokens based
+    on whether they appear in the text so far, increasing the model's
+    likelihood to talk about new topics.
+  stop: str
+    A string where the API will stop generating further tokens.
+  logit_bias: dict, str
+    Modify the likelihood of specified tokens appearing in the completion.
+    Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
+  logprobs: int
+    Include the log probabilities of most likely N per token
 
 ```
 <!-- [[[end]]] -->

diff --git a/llm/cli.py b/llm/cli.py
@@ -280,6 +280,7 @@ def read_prompt():
         else:
             print(response.text())
     except Exception as ex:
+        raise
         raise click.ClickException(str(ex))
 
     # Log to the database

diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py
@@ -315,6 +315,13 @@ def build_kwargs(self, prompt):
 
 
 class Completion(Chat):
+    class Options(Chat.Options):
+        logprobs: Optional[int] = Field(
+            description="Include the log probabilities of most likely N per token",
+            default=None,
+            le=5,
+        )
+
     def __init__(self, *args, default_max_tokens=None, **kwargs):
         super().__init__(*args, **kwargs)
         self.default_max_tokens = default_max_tokens
@@ -365,8 +372,24 @@ def combine_chunks(chunks: List[dict]) -> dict:
     role = None
     finish_reason = None
 
+    # If any of them have log probability, we're going to persist
+    # those later on
+    logprobs = []
+
     for item in chunks:
         for choice in item["choices"]:
+            if (
+                "logprobs" in choice
+                and "text" in choice
+                and isinstance(choice["logprobs"], dict)
+                and "top_logprobs" in choice["logprobs"]
+            ):
+                logprobs.append(
+                    {
+                        "text": choice["text"],
+                        "top_logprobs": choice["logprobs"]["top_logprobs"],
+                    }
+                )
             if "text" in choice and "delta" not in choice:
                 content += choice["text"]
                 continue
@@ -383,6 +406,8 @@ def combine_chunks(chunks: List[dict]) -> dict:
         "role": role,
         "finish_reason": finish_reason,
     }
+    if logprobs:
+        combined["logprobs"] = logprobs
     for key in ("id", "object", "model", "created", "index"):
         if key in chunks[0]:
             combined[key] = chunks[0][key]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -154,11 +154,10 @@ def mocked_openai_chat(requests_mock):
 @pytest.fixture
 def mocked_openai_chat_stream(requests_mock):
     def stream_events(*args):
-        print(args)
         for delta, finish_reason in (
             ({"role": "assistant", "content": ""}, None),
             ({"content": "Hi"}, None),
-            ({"content": "!"}, None),
+            ({"content": "."}, None),
             ({}, "stop"),
         ):
             yield "data: {}\n\n".format(
@@ -174,8 +173,9 @@ def stream_events(*args):
                     }
                 )
             ).encode("utf-8")
+        yield "data: [DONE]\n\n".encode("utf-8")
 
-    requests_mock.post(
+    return requests_mock.post(
         "https://api.openai.com/v1/chat/completions",
         content=b"".join(stream_events()),
         headers={"Content-Type": "text/event-stream"},
@@ -205,6 +205,117 @@ def mocked_openai_completion(requests_mock):
     )
 
 
+@pytest.fixture
+def mocked_openai_completion_logprobs_stream(requests_mock):
+    choices_chunks = [
+        [
+            {
+                "text": "\n\n",
+                "index": 0,
+                "logprobs": {
+                    "tokens": ["\n\n"],
+                    "token_logprobs": [-0.6],
+                    "top_logprobs": [{"\n\n": -0.6, "\n": -1.9}],
+                    "text_offset": [16],
+                },
+                "finish_reason": None,
+            }
+        ],
+        [
+            {
+                "text": "Hi",
+                "index": 0,
+                "logprobs": {
+                    "tokens": ["Hi"],
+                    "token_logprobs": [-1.1],
+                    "top_logprobs": [{"Hi": -1.1, "Hello": -0.7}],
+                    "text_offset": [18],
+                },
+                "finish_reason": None,
+            }
+        ],
+        [
+            {
+                "text": ".",
+                "index": 0,
+                "logprobs": {
+                    "tokens": ["."],
+                    "token_logprobs": [-1.1],
+                    "top_logprobs": [{".": -1.1, "!": -0.9}],
+                    "text_offset": [20],
+                },
+                "finish_reason": None,
+            }
+        ],
+        [
+            {
+                "text": "",
+                "index": 0,
+                "logprobs": {
+                    "tokens": [],
+                    "token_logprobs": [],
+                    "top_logprobs": [],
+                    "text_offset": [],
+                },
+                "finish_reason": "stop",
+            }
+        ],
+    ]
+
+    def stream_events():
+        for choices in choices_chunks:
+            yield "data: {}\n\n".format(
+                json.dumps(
+                    {
+                        "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
+                        "object": "text_completion",
+                        "created": 1695097702,
+                        "choices": choices,
+                        "model": "gpt-3.5-turbo-instruct",
+                    }
+                )
+            ).encode("utf-8")
+        yield "data: [DONE]\n\n".encode("utf-8")
+
+    return requests_mock.post(
+        "https://api.openai.com/v1/completions",
+        content=b"".join(stream_events()),
+        headers={"Content-Type": "text/event-stream"},
+    )
+
+
+@pytest.fixture
+def mocked_openai_completion_logprobs(requests_mock):
+    return requests_mock.post(
+        "https://api.openai.com/v1/completions",
+        json={
+            "id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL",
+            "object": "text_completion",
+            "created": 1695097747,
+            "model": "gpt-3.5-turbo-instruct",
+            "choices": [
+                {
+                    "text": "\n\nHi.",
+                    "index": 0,
+                    "logprobs": {
+                        "tokens": ["\n\n", "Hi", "1"],
+                        "token_logprobs": [-0.6, -1.1, -0.9],
+                        "top_logprobs": [
+                            {"\n\n": -0.6, "\n": -1.9},
+                            {"Hi": -1.1, "Hello": -0.7},
+                            {".": -0.9, "!": -1.1},
+                        ],
+                        "text_offset": [16, 18, 20],
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {"prompt_tokens": 5, "completion_tokens": 3, "total_tokens": 8},
+        },
+        headers={"Content-Type": "application/json"},
+    )
+
+
 @pytest.fixture
 def mocked_localai(requests_mock):
     return requests_mock.post(

diff --git a/tests/test_llm.py b/tests/test_llm.py
@@ -298,7 +298,7 @@ def test_openai_chat_stream(mocked_openai_chat_stream, user_path):
     runner = CliRunner()
     result = runner.invoke(cli, ["-m", "gpt-3.5-turbo", "--key", "x", "Say hi"])
     assert result.exit_code == 0
-    assert result.output == "Hi!\n"
+    assert result.output == "Hi.\n"
 
 
 def test_openai_completion(mocked_openai_completion, user_path):
@@ -344,6 +344,96 @@ def test_openai_completion(mocked_openai_completion, user_path):
     assert expected.items() <= row.items()
 
 
+def test_openai_completion_logprobs_stream(
+    mocked_openai_completion_logprobs_stream, user_path
+):
+    log_path = user_path / "logs.db"
+    log_db = sqlite_utils.Database(str(log_path))
+    log_db["responses"].delete_where()
+    runner = CliRunner()
+    args = [
+        "-m",
+        "gpt-3.5-turbo-instruct",
+        "Say hi",
+        "-o",
+        "logprobs",
+        "2",
+        "--key",
+        "x",
+    ]
+    result = runner.invoke(cli, args, catch_exceptions=False)
+    assert result.exit_code == 0
+    assert result.output == "\n\nHi.\n"
+    rows = list(log_db["responses"].rows)
+    assert len(rows) == 1
+    row = rows[0]
+    assert json.loads(row["response_json"]) == {
+        "content": "\n\nHi.",
+        "role": None,
+        "finish_reason": None,
+        "logprobs": [
+            {"text": "\n\n", "top_logprobs": [{"\n\n": -0.6, "\n": -1.9}]},
+            {"text": "Hi", "top_logprobs": [{"Hi": -1.1, "Hello": -0.7}]},
+            {"text": ".", "top_logprobs": [{".": -1.1, "!": -0.9}]},
+            {"text": "", "top_logprobs": []},
+        ],
+        "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
+        "object": "text_completion",
+        "model": "gpt-3.5-turbo-instruct",
+        "created": 1695097702,
+    }
+
+
+def test_openai_completion_logprobs_nostream(
+    mocked_openai_completion_logprobs, user_path
+):
+    log_path = user_path / "logs.db"
+    log_db = sqlite_utils.Database(str(log_path))
+    log_db["responses"].delete_where()
+    runner = CliRunner()
+    args = [
+        "-m",
+        "gpt-3.5-turbo-instruct",
+        "Say hi",
+        "-o",
+        "logprobs",
+        "2",
+        "--key",
+        "x",
+        "--no-stream",
+    ]
+    result = runner.invoke(cli, args, catch_exceptions=False)
+    assert result.exit_code == 0
+    assert result.output == "\n\nHi.\n"
+    rows = list(log_db["responses"].rows)
+    assert len(rows) == 1
+    row = rows[0]
+    assert json.loads(row["response_json"]) == {
+        "choices": [
+            {
+                "finish_reason": "stop",
+                "index": 0,
+                "logprobs": {
+                    "text_offset": [16, 18, 20],
+                    "token_logprobs": [-0.6, -1.1, -0.9],
+                    "tokens": ["\n\n", "Hi", "1"],
+                    "top_logprobs": [
+                        {"\n": -1.9, "\n\n": -0.6},
+                        {"Hello": -0.7, "Hi": -1.1},
+                        {"!": -1.1, ".": -0.9},
+                    ],
+                },
+                "text": "\n\nHi.",
+            }
+        ],
+        "created": 1695097747,
+        "id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL",
+        "model": "gpt-3.5-turbo-instruct",
+        "object": "text_completion",
+        "usage": {"completion_tokens": 3, "prompt_tokens": 5, "total_tokens": 8},
+    }
+
+
 EXTRA_MODELS_YAML = """
 - model_id: orca
   model_name: orca-mini-3b