feat: maximum_tokens attribute of CompletionRequest defaults to None

Aleph-Alpha · Aug 12, 2024 · ae03dc8 · ae03dc8
1 parent 90d04f0
commit ae03dc8
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 7 deletions.
diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py
@@ -15,12 +15,14 @@ class CompletionRequest:
             Unconditional completion can be started with an empty string (default).
             The prompt may contain a zero shot or few shot task.
 
-        maximum_tokens (int, optional, default 64):
+        maximum_tokens (int, optional, default None):
             The maximum number of tokens to be generated.
             Completion will terminate after the maximum number of tokens is reached. Increase this value to generate longer texts.
             A text is split into tokens. Usually there are more tokens than words.
             The maximum supported number of tokens depends on the model (for luminous-base, it may not exceed 2048 tokens).
-            The prompt's tokens plus the maximum_tokens request must not exceed this number.
+            The prompt's tokens plus the maximum_tokens request must not exceed this number. If set to None, the model will stop
+            generating tokens either if it outputs a sequence specified in `stop_sequences` or if it reaches its technical limit.
+            For most models, this means that the sum of input and output tokens is equal to its context window.
 
         temperature (float, optional, default 0.0)
             A higher sampling temperature encourages the model to produce less probable outputs ("be more creative"). Values are expected in a range from 0.0 to 1.0. Try high values (e.g. 0.9) for a more "creative" response and the default 0.0 for a well defined and repeatable answer.
@@ -181,7 +183,7 @@ class CompletionRequest:
     """
 
     prompt: Prompt
-    maximum_tokens: int = 64
+    maximum_tokens: Optional[int] = None
     temperature: float = 0.0
     top_k: int = 0
     top_p: float = 0.0

diff --git a/tests/test_complete.py b/tests/test_complete.py
@@ -18,9 +18,6 @@
 )
 
 
-# AsyncClient
-
-
 @pytest.mark.system_test
 async def test_can_complete_with_async_client(
     async_client: AsyncClient, model_name: str
@@ -35,7 +32,17 @@ async def test_can_complete_with_async_client(
     assert response.model_version is not None
 
 
-# Client
+@pytest.mark.system_test
+def test_complete_maximum_tokens_none(sync_client: Client, model_name: str):
+    request = CompletionRequest(
+        prompt=Prompt.from_text("Hello, World!"),
+        maximum_tokens=None,
+        stop_sequences=[","],
+    )
+
+    response = sync_client.complete(request, model=model_name)
+    assert len(response.completions) == 1
+    assert len(response.completions[0].completion) < 100
 
 
 @pytest.mark.system_test