feat: maximum_tokens attribute of CompletionRequest defaults to None

Aleph-Alpha · Aug 9, 2024 · 56e721e · 56e721e
1 parent 90d04f0
commit 56e721e
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 6 deletions.
diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py
@@ -15,12 +15,14 @@ class CompletionRequest:
             Unconditional completion can be started with an empty string (default).
             The prompt may contain a zero shot or few shot task.
 
-        maximum_tokens (int, optional, default 64):
+        maximum_tokens (int, optional, default None):
             The maximum number of tokens to be generated.
             Completion will terminate after the maximum number of tokens is reached. Increase this value to generate longer texts.
             A text is split into tokens. Usually there are more tokens than words.
             The maximum supported number of tokens depends on the model (for luminous-base, it may not exceed 2048 tokens).
             The prompt's tokens plus the maximum_tokens request must not exceed this number.
+            If set to None, the model will stop generating tokens either by emitting a stop sequence token or if it reaches its
+            technical limit. For most models, this means that the sum of input and output tokens is equal to its context window.
 
         temperature (float, optional, default 0.0)
             A higher sampling temperature encourages the model to produce less probable outputs ("be more creative"). Values are expected in a range from 0.0 to 1.0. Try high values (e.g. 0.9) for a more "creative" response and the default 0.0 for a well defined and repeatable answer.
@@ -181,7 +183,7 @@ class CompletionRequest:
     """
 
     prompt: Prompt
-    maximum_tokens: int = 64
+    maximum_tokens: Optional[int] = None
     temperature: float = 0.0
     top_k: int = 0
     top_p: float = 0.0

diff --git a/tests/test_complete.py b/tests/test_complete.py
@@ -18,9 +18,6 @@
 )
 
 
-# AsyncClient
-
-
 @pytest.mark.system_test
 async def test_can_complete_with_async_client(
     async_client: AsyncClient, model_name: str
@@ -35,7 +32,15 @@ async def test_can_complete_with_async_client(
     assert response.model_version is not None
 
 
-# Client
+@pytest.mark.system_test
+def test_complete_maximum_tokens_none(sync_client: Client, model_name: str):
+    request = CompletionRequest(
+        prompt=Prompt.from_text("Hello, World!"),
+        maximum_tokens=None,
+    )
+
+    response = sync_client.complete(request, model=model_name)
+    assert len(response.completions) == 1
 
 
 @pytest.mark.system_test