From 14b9a4280eb0823aafee8151363713630138b22e Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 10:32:38 +0100 Subject: [PATCH 1/8] Update completion response parameter documentation --- aleph_alpha_client/completion.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py index ac48523..272f7c8 100644 --- a/aleph_alpha_client/completion.py +++ b/aleph_alpha_client/completion.py @@ -247,6 +247,19 @@ def _asdict(self) -> Mapping[str, Any]: @dataclass(frozen=True) class CompletionResponse: + """ + Describes a completion response + + Parameters: + model_version: + Model name and version (if any) of the used model for inference. + completions: + List of completions; may contain only one entry if no more are requested (see parameter n). + optimized_prompt: + Describes prompt after optimizations. This field is only returned if the flag + `disable_optimizations` flag is not set and the prompt has actually changed. + """ + model_version: str completions: Sequence[CompletionResult] optimized_prompt: Optional[Prompt] = None From 6e7571abb007e31327e6deaf8f12ac7cdfc0d761 Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 14:06:21 +0100 Subject: [PATCH 2/8] Add num_tokens_prompt_total to completion response --- aleph_alpha_client/completion.py | 3 +++ tests/test_complete.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py index 272f7c8..f15813b 100644 --- a/aleph_alpha_client/completion.py +++ b/aleph_alpha_client/completion.py @@ -262,11 +262,13 @@ class CompletionResponse: model_version: str completions: Sequence[CompletionResult] + num_tokens_prompt_total: int optimized_prompt: Optional[Prompt] = None @staticmethod def from_json(json: Dict[str, Any]) -> "CompletionResponse": optimized_prompt_json = json.get("optimized_prompt") + print(json) return CompletionResponse( model_version=json["model_version"], completions=[ @@ -275,6 +277,7 @@ def from_json(json: Dict[str, Any]) -> "CompletionResponse": optimized_prompt=Prompt.from_json(optimized_prompt_json) if optimized_prompt_json else None, + num_tokens_prompt_total=json["num_tokens_prompt_total"], ) def to_json(self) -> Mapping[str, Any]: diff --git a/tests/test_complete.py b/tests/test_complete.py index 863a9fe..5d72309 100644 --- a/tests/test_complete.py +++ b/tests/test_complete.py @@ -127,3 +127,16 @@ def test_complete_with_echo(sync_client: Client, model_name: str, prompt_image: assert len(completion_result.completion_tokens) > 0 assert completion_result.log_probs is not None assert len(completion_result.log_probs) > 0 + +@pytest.mark.system_test +def test_num_tokes_prompt_total_with_best_of(sync_client: Client, model_name: str): + tokens = [49222, 2998] # Hello world + best_of = 2 + request = CompletionRequest( + prompt = Prompt.from_tokens(tokens), + best_of = best_of, + maximum_tokens = 1, + ) + + response = sync_client.complete(request, model=model_name) + assert response.num_tokens_prompt_total == len(tokens) * best_of From 9bd7cc9cf20c43a77a95ca7ce164324911209296 Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 14:37:44 +0100 Subject: [PATCH 3/8] Add num_tokens_generated to completion response --- aleph_alpha_client/completion.py | 12 ++++++++- tests/test_complete.py | 42 +++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py index f15813b..f72b6c9 100644 --- a/aleph_alpha_client/completion.py +++ b/aleph_alpha_client/completion.py @@ -255,6 +255,14 @@ class CompletionResponse: Model name and version (if any) of the used model for inference. completions: List of completions; may contain only one entry if no more are requested (see parameter n). + num_tokens_prompt_total: + Number of tokens combined across all completion tasks. + In particular, if you set best_of or n to a number larger than 1 then we report the + combined prompt token count for all best_of or n tasks. + num_tokens_generated: + Number of tokens combined across all completion tasks. + If multiple completions are returned or best_of is set to a value greater than 1 then + this value contains the combined generated token count. optimized_prompt: Describes prompt after optimizations. This field is only returned if the flag `disable_optimizations` flag is not set and the prompt has actually changed. @@ -263,6 +271,7 @@ class CompletionResponse: model_version: str completions: Sequence[CompletionResult] num_tokens_prompt_total: int + num_tokens_generated: int optimized_prompt: Optional[Prompt] = None @staticmethod @@ -274,10 +283,11 @@ def from_json(json: Dict[str, Any]) -> "CompletionResponse": completions=[ CompletionResult.from_json(item) for item in json["completions"] ], + num_tokens_prompt_total=json["num_tokens_prompt_total"], + num_tokens_generated=json["num_tokens_generated"], optimized_prompt=Prompt.from_json(optimized_prompt_json) if optimized_prompt_json else None, - num_tokens_prompt_total=json["num_tokens_prompt_total"], ) def to_json(self) -> Mapping[str, Any]: diff --git a/tests/test_complete.py b/tests/test_complete.py index 5d72309..88a0dc3 100644 --- a/tests/test_complete.py +++ b/tests/test_complete.py @@ -129,7 +129,7 @@ def test_complete_with_echo(sync_client: Client, model_name: str, prompt_image: assert len(completion_result.log_probs) > 0 @pytest.mark.system_test -def test_num_tokes_prompt_total_with_best_of(sync_client: Client, model_name: str): +def test_num_tokens_prompt_total_with_best_of(sync_client: Client, model_name: str): tokens = [49222, 2998] # Hello world best_of = 2 request = CompletionRequest( @@ -140,3 +140,43 @@ def test_num_tokes_prompt_total_with_best_of(sync_client: Client, model_name: st response = sync_client.complete(request, model=model_name) assert response.num_tokens_prompt_total == len(tokens) * best_of + +""" +curl https://api.aleph-alpha.com/complete -X POST -H "Authorization: Bearer $AA_API_TOKEN" -H "Content-Type: application/json" + -d '{ "model": "luminous-base", "prompt": [{ "type": "text", "data": "Hello world"}], "maximum_tokens": 1, "n": 2, "tokens": true }' +{"completions": + [ + { + "completion":"!", + "raw_completion":"!", + "completion_tokens":["!"], + "finish_reason":"maximum_tokens" + }, + { + "completion":"!", + "raw_completion":"!", + "completion_tokens":["!"], + "finish_reason":"maximum_tokens" + } + ], + "model_version":"2022-04", + "num_tokens_prompt_total":4, + "num_tokens_generated":2} +""" + +@pytest.mark.system_test +def test_num_tokens_generated_with_best_of(sync_client: Client, model_name: str): + hello_world = [49222, 2998] # Hello world + best_of = 2 + request = CompletionRequest( + prompt = Prompt.from_tokens(hello_world), + best_of = best_of, + maximum_tokens = 1, + tokens = True, + ) + + response = sync_client.complete(request, model=model_name) + completion_result = response.completions[0] + number_tokens_completion = len(completion_result.completion_tokens) + + assert response.num_tokens_generated == best_of * number_tokens_completion \ No newline at end of file From e7d6129a0620ea5ef327f44ef15fa1539d668079 Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 15:15:43 +0100 Subject: [PATCH 4/8] Add num_tokens_prompt_total and num_tokens_generated to nice_flag tests --- tests/test_clients.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_clients.py b/tests/test_clients.py index 3327510..333c55b 100644 --- a/tests/test_clients.py +++ b/tests/test_clients.py @@ -33,7 +33,9 @@ def test_nice_flag_on_client(httpserver: HTTPServer): ).respond_with_json( CompletionResponse( "model_version", - [CompletionResult(log_probs=[], completion="foo")], + [CompletionResult(log_probs=[], completion="foo", )], + num_tokens_prompt_total=2, + num_tokens_generated=1, ).to_json() ) @@ -47,7 +49,10 @@ async def test_nice_flag_on_async_client(httpserver: HTTPServer): httpserver.expect_request("/version").respond_with_data("OK") httpserver.expect_request( - "/complete", query_string={"nice": "true"} + "/complete", + query_string={"nice": "true"}, + num_tokens_prompt_total=2, + num_tokens_generated=1, ).respond_with_json( CompletionResponse( "model_version", From ac2a9bd375f0908e3a9d4d4a03b292313a9ef112 Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 15:28:34 +0100 Subject: [PATCH 5/8] Add num_tokens_prompt_total and num_tokens_generated to tests --- tests/test_clients.py | 4 ++-- tests/test_complete.py | 1 + tests/test_error_handling.py | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_clients.py b/tests/test_clients.py index 333c55b..10da424 100644 --- a/tests/test_clients.py +++ b/tests/test_clients.py @@ -51,12 +51,12 @@ async def test_nice_flag_on_async_client(httpserver: HTTPServer): httpserver.expect_request( "/complete", query_string={"nice": "true"}, - num_tokens_prompt_total=2, - num_tokens_generated=1, ).respond_with_json( CompletionResponse( "model_version", [CompletionResult(log_probs=[], completion="foo")], + num_tokens_prompt_total=2, + num_tokens_generated=1, ).to_json() ) diff --git a/tests/test_complete.py b/tests/test_complete.py index 88a0dc3..87f726a 100644 --- a/tests/test_complete.py +++ b/tests/test_complete.py @@ -177,6 +177,7 @@ def test_num_tokens_generated_with_best_of(sync_client: Client, model_name: str) response = sync_client.complete(request, model=model_name) completion_result = response.completions[0] + assert completion_result.completion_tokens is not None number_tokens_completion = len(completion_result.completion_tokens) assert response.num_tokens_generated == best_of * number_tokens_completion \ No newline at end of file diff --git a/tests/test_error_handling.py b/tests/test_error_handling.py index 834d204..485f27e 100644 --- a/tests/test_error_handling.py +++ b/tests/test_error_handling.py @@ -111,8 +111,7 @@ def expect_retryable_error( def expect_valid_completion(httpserver: HTTPServer) -> None: httpserver.expect_ordered_request("/complete").respond_with_json( - {"model_version": "1", "completions": []} - ) + {"model_version": "1", "completions": [], "num_tokens_prompt_total": 0, "num_tokens_generated": 0}) def expect_valid_version(httpserver: HTTPServer) -> None: From 5fe52df24ab0fb9cc7cba96eb6ed4d5f437a591e Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 15:36:54 +0100 Subject: [PATCH 6/8] prepare release 5.0.0 --- Changelog.md | 6 ++++++ aleph_alpha_client/version.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index d67c35b..482c670 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,11 @@ # Changelog +## 5.0.0 + +- Added `num_tokens_prompt_total` and `num_tokens_generated` to `CompletionResponse`. This is a + breaking change as these were introduced as mandatory parameters rather than optional ones. + HTTP API version 1.14.0 or higher is required. + ## 4.1.0 - Added `verify_ssl` flag so you can disable SSL checking for your sessions. diff --git a/aleph_alpha_client/version.py b/aleph_alpha_client/version.py index 7039708..ba7be38 100644 --- a/aleph_alpha_client/version.py +++ b/aleph_alpha_client/version.py @@ -1 +1 @@ -__version__ = "4.1.0" +__version__ = "5.0.0" From 6a30af4d71ab95196de1f7ca6e56a7ba4b61b5f5 Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 15:37:49 +0100 Subject: [PATCH 7/8] remove json print statement --- aleph_alpha_client/completion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py index f72b6c9..128d003 100644 --- a/aleph_alpha_client/completion.py +++ b/aleph_alpha_client/completion.py @@ -277,7 +277,6 @@ class CompletionResponse: @staticmethod def from_json(json: Dict[str, Any]) -> "CompletionResponse": optimized_prompt_json = json.get("optimized_prompt") - print(json) return CompletionResponse( model_version=json["model_version"], completions=[ From c0beb1dd37dc05851a67f94908b83339ade55eaf Mon Sep 17 00:00:00 2001 From: Martin Reinhardt Date: Tue, 5 Dec 2023 15:38:38 +0100 Subject: [PATCH 8/8] remove temporary comments --- tests/test_complete.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/tests/test_complete.py b/tests/test_complete.py index 87f726a..a248ced 100644 --- a/tests/test_complete.py +++ b/tests/test_complete.py @@ -141,29 +141,6 @@ def test_num_tokens_prompt_total_with_best_of(sync_client: Client, model_name: s response = sync_client.complete(request, model=model_name) assert response.num_tokens_prompt_total == len(tokens) * best_of -""" -curl https://api.aleph-alpha.com/complete -X POST -H "Authorization: Bearer $AA_API_TOKEN" -H "Content-Type: application/json" - -d '{ "model": "luminous-base", "prompt": [{ "type": "text", "data": "Hello world"}], "maximum_tokens": 1, "n": 2, "tokens": true }' -{"completions": - [ - { - "completion":"!", - "raw_completion":"!", - "completion_tokens":["!"], - "finish_reason":"maximum_tokens" - }, - { - "completion":"!", - "raw_completion":"!", - "completion_tokens":["!"], - "finish_reason":"maximum_tokens" - } - ], - "model_version":"2022-04", - "num_tokens_prompt_total":4, - "num_tokens_generated":2} -""" - @pytest.mark.system_test def test_num_tokens_generated_with_best_of(sync_client: Client, model_name: str): hello_world = [49222, 2998] # Hello world