diff --git a/aidial_adapter_bedrock/dial_api/response.py b/aidial_adapter_bedrock/dial_api/response.py index 77f4d94..f46a810 100644 --- a/aidial_adapter_bedrock/dial_api/response.py +++ b/aidial_adapter_bedrock/dial_api/response.py @@ -19,11 +19,9 @@ class ModelsResponse(BaseModel): def _encode_vector( - encoding_format: Literal["float", "base64"], - vector: List[float], + encoding_format: Literal["float", "base64"], vector: List[float] ) -> List[float] | str: - base64_encoding = encoding_format == "base64" - return vector_to_base64(vector) if base64_encoding else vector + return vector_to_base64(vector) if encoding_format == "base64" else vector def make_embeddings_response( diff --git a/aidial_adapter_bedrock/llm/model/llama/v3.py b/aidial_adapter_bedrock/llm/model/llama/v3.py index 182203e..3cbe166 100644 --- a/aidial_adapter_bedrock/llm/model/llama/v3.py +++ b/aidial_adapter_bedrock/llm/model/llama/v3.py @@ -1,4 +1,5 @@ import json +from typing import Awaitable, Callable from aidial_adapter_bedrock.dial_api.request import ModelParameters from aidial_adapter_bedrock.llm.converse.adapter import ( @@ -26,7 +27,7 @@ def is_stream(self, params: ModelParameters) -> bool: def input_tokenizer_factory( deployment: ConverseDeployment, params: ConverseRequestWrapper -): +) -> Callable[[ConverseMessages], Awaitable[int]]: tool_tokens = default_tokenize_string(json.dumps(params.toolConfig)) system_tokens = default_tokenize_string(json.dumps(params.system))