Skip to content

Commit

Permalink
Add num_prompt_tokens_total to the embed responses
Browse files Browse the repository at this point in the history
  • Loading branch information
WieslerTNG committed Jan 9, 2024
1 parent 9d494fe commit f3b66c4
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
12 changes: 8 additions & 4 deletions aleph_alpha_client/aleph_alpha_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ def batch_semantic_embed(

responses: List[EmbeddingVector] = []
model_version = ""
num_prompt_tokens_total = 0
# The API currently only supports batch semantic embedding requests with up to 100
# prompts per batch. As a convenience for users, this function chunks larger requests.
for batch_request in _generate_semantic_embedding_batches(request):
Expand All @@ -445,9 +446,10 @@ def batch_semantic_embed(
response = BatchSemanticEmbeddingResponse.from_json(raw_response)
model_version = response.model_version
responses.extend(response.embeddings)
num_prompt_tokens_total += response.num_prompt_tokens_total

return BatchSemanticEmbeddingResponse._from_model_version_and_embeddings(
model_version, responses
return BatchSemanticEmbeddingResponse(
model_version=model_version, embeddings=responses, num_prompt_tokens_total=num_prompt_tokens_total
)

def evaluate(
Expand Down Expand Up @@ -971,13 +973,15 @@ async def batch_semantic_embed(
_generate_semantic_embedding_batches(request, batch_size),
progress_bar,
)
num_prompt_tokens_total = 0
for result in results:
resp = BatchSemanticEmbeddingResponse.from_json(result)
model_version = resp.model_version
responses.extend(resp.embeddings)
num_prompt_tokens_total += resp.num_prompt_tokens_total

return BatchSemanticEmbeddingResponse._from_model_version_and_embeddings(
model_version, responses
return BatchSemanticEmbeddingResponse(
model_version=model_version, embeddings=responses, num_prompt_tokens_total=num_prompt_tokens_total
)

async def evaluate(
Expand Down
11 changes: 8 additions & 3 deletions aleph_alpha_client/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def _asdict(self) -> Mapping[str, Any]:
@dataclass(frozen=True)
class EmbeddingResponse:
model_version: str
num_prompt_tokens_total: int
embeddings: Optional[Dict[Tuple[str, str], List[float]]]
tokens: Optional[List[str]]
message: Optional[str] = None
Expand All @@ -103,6 +104,7 @@ def from_json(json: Dict[str, Any]) -> "EmbeddingResponse":
},
tokens=json.get("tokens"),
message=json.get("message"),
num_prompt_tokens_total=json.get("num_prompt_tokens_total", 0)
)


Expand Down Expand Up @@ -289,6 +291,7 @@ class SemanticEmbeddingResponse:

model_version: str
embedding: EmbeddingVector
num_prompt_tokens_total: int
message: Optional[str] = None

@staticmethod
Expand All @@ -297,6 +300,7 @@ def from_json(json: Dict[str, Any]) -> "SemanticEmbeddingResponse":
model_version=json["model_version"],
embedding=json["embedding"],
message=json.get("message"),
num_prompt_tokens_total=json.get("num_prompt_tokens_total", 0)
)


Expand All @@ -314,17 +318,18 @@ class BatchSemanticEmbeddingResponse:

model_version: str
embeddings: Sequence[EmbeddingVector]
num_prompt_tokens_total: int

@staticmethod
def from_json(json: Dict[str, Any]) -> "BatchSemanticEmbeddingResponse":
return BatchSemanticEmbeddingResponse(
model_version=json["model_version"], embeddings=json["embeddings"]
model_version=json["model_version"], embeddings=json["embeddings"], num_prompt_tokens_total=json.get("num_prompt_tokens_total", 0)
)

@staticmethod
def _from_model_version_and_embeddings(
model_version: str, embeddings: Sequence[EmbeddingVector]
model_version: str, embeddings: Sequence[EmbeddingVector], num_prompt_tokens_total: int
) -> "BatchSemanticEmbeddingResponse":
return BatchSemanticEmbeddingResponse(
model_version=model_version, embeddings=embeddings
model_version=model_version, embeddings=embeddings, num_prompt_tokens_total=num_prompt_tokens_total
)
16 changes: 12 additions & 4 deletions tests/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from pytest_httpserver import HTTPServer

from aleph_alpha_client import EmbeddingRequest
from aleph_alpha_client import EmbeddingRequest, TokenizationRequest
from aleph_alpha_client.aleph_alpha_client import AsyncClient, Client
from aleph_alpha_client.embedding import (
BatchSemanticEmbeddingRequest,
Expand Down Expand Up @@ -34,6 +34,7 @@ async def test_can_embed_with_async_client(async_client: AsyncClient, model_name
request.pooling
) * len(request.layers)
assert response.tokens is not None
assert response.num_prompt_tokens_total == 1


@pytest.mark.system_test
Expand All @@ -50,6 +51,7 @@ async def test_can_semantic_embed_with_async_client(
assert response.model_version is not None
assert response.embedding
assert len(response.embedding) == 128
assert response.num_prompt_tokens_total == 1


@pytest.mark.parametrize("num_prompts", [1, 100, 101])
Expand All @@ -58,17 +60,20 @@ async def test_batch_embed_semantic_with_async_client(
async_client: AsyncClient, sync_client: Client, num_prompts: int, batch_size: int
):
words = ["car", "elephant", "kitchen sink", "rubber", "sun"]
prompts = [Prompt.from_text(words[random.randint(0, 4)]) for i in range(num_prompts)]
tokens = [async_client.tokenize(TokenizationRequest(prompt=p.items[0].text, tokens=True, token_ids=False), "luminous-base") for p in prompts]

request = BatchSemanticEmbeddingRequest(
prompts=[
Prompt.from_text(words[random.randint(0, 4)]) for i in range(num_prompts)
],
prompts=prompts,
representation=SemanticRepresentation.Symmetric,
compress_to_size=128,
)

result = await async_client.batch_semantic_embed(
request=request, num_concurrent_requests=10, batch_size=batch_size
)
num_tokens = sum([len((await t).tokens) for t in tokens])
assert result.num_prompt_tokens_total == num_tokens

assert len(result.embeddings) == num_prompts
# To make sure that the ordering of responses is preserved,
Expand Down Expand Up @@ -142,6 +147,7 @@ def test_embed(sync_client: Client, model_name: str):
request.layers
)
assert result.tokens is None
assert result.num_prompt_tokens_total == 1


@pytest.mark.system_test
Expand Down Expand Up @@ -178,6 +184,7 @@ def test_embed_with_tokens(sync_client: Client, model_name: str):
request.layers
)
assert result.tokens is not None
assert result.num_prompt_tokens_total == 1


@pytest.mark.system_test
Expand All @@ -193,6 +200,7 @@ def test_embed_semantic(sync_client: Client):
assert result.model_version is not None
assert result.embedding
assert len(result.embedding) == 128
assert result.num_prompt_tokens_total == 1


@pytest.mark.parametrize("num_prompts", [1, 100, 101, 200, 1000])
Expand Down

0 comments on commit f3b66c4

Please sign in to comment.