Skip to content

Commit

Permalink
feat: accept list as prompt and use first string (huggingface#1702)
Browse files Browse the repository at this point in the history
This PR allows the `CompletionRequest.prompt` to be sent as a string or
array of strings. When an array is sent the first value will be used if
it's a string; otherwise the according error will be thrown

Fixes:
huggingface#1690
Similar to: https://github.com/vllm-project/vllm/pull/323/files
  • Loading branch information
drbh authored and kdamaszk committed Jun 3, 2024
1 parent fea0f2f commit 9189169
Show file tree
Hide file tree
Showing 11 changed files with 1,188 additions and 107 deletions.
21 changes: 21 additions & 0 deletions clients/python/text_generation/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,17 @@ class ChatCompletionComplete(BaseModel):
usage: Optional[Any] = None


class CompletionComplete(BaseModel):
# Index of the chat completion
index: int
# Message associated with the chat completion
text: str
# Log probabilities for the chat completion
logprobs: Optional[Any]
# Reason for completion
finish_reason: str


class Function(BaseModel):
name: Optional[str]
arguments: str
Expand Down Expand Up @@ -104,6 +115,16 @@ class ChatComplete(BaseModel):
usage: Any


class Completion(BaseModel):
# Completion details
id: str
object: str
created: int
model: str
system_fingerprint: str
choices: List[CompletionComplete]


class ChatRequest(BaseModel):
# Model identifier
model: str
Expand Down
9 changes: 9 additions & 0 deletions docs/source/basic_tutorials/launcher.md
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,15 @@ Options:
-e, --env
Display a lot of information about your runtime environment
```
## MAX_CLIENT_BATCH_SIZE
```shell
--max-client-batch-size <MAX_CLIENT_BATCH_SIZE>
Control the maximum number of inputs that a client can send in a single request
[env: MAX_CLIENT_BATCH_SIZE=]
[default: 4]
```
## HELP
```shell
Expand Down
27 changes: 21 additions & 6 deletions integration-tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import math
import time
import random
import re

from docker.errors import NotFound
from typing import Optional, List, Dict
Expand All @@ -26,6 +27,7 @@
ChatComplete,
ChatCompletionChunk,
ChatCompletionComplete,
Completion,
)

DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None)
Expand Down Expand Up @@ -69,17 +71,22 @@ def convert_data(data):
data = json.loads(data)
if isinstance(data, Dict) and "choices" in data:
choices = data["choices"]
if (
isinstance(choices, List)
and len(choices) >= 1
and "delta" in choices[0]
):
return ChatCompletionChunk(**data)
if isinstance(choices, List) and len(choices) >= 1:
if "delta" in choices[0]:
return ChatCompletionChunk(**data)
if "text" in choices[0]:
return Completion(**data)
return ChatComplete(**data)

if isinstance(data, Dict):
return Response(**data)
if isinstance(data, List):
if (
len(data) > 0
and "object" in data[0]
and data[0]["object"] == "text_completion"
):
return [Completion(**d) for d in data]
return [Response(**d) for d in data]
raise NotImplementedError

Expand Down Expand Up @@ -161,6 +168,9 @@ def eq_details(details: Details, other: Details) -> bool:
)
)

def eq_completion(response: Completion, other: Completion) -> bool:
return response.choices[0].text == other.choices[0].text

def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool:
return (
response.choices[0].message.content == other.choices[0].message.content
Expand All @@ -184,6 +194,11 @@ def eq_response(response: Response, other: Response) -> bool:
if not isinstance(snapshot_data, List):
snapshot_data = [snapshot_data]

if isinstance(serialized_data[0], Completion):
return len(snapshot_data) == len(serialized_data) and all(
[eq_completion(r, o) for r, o in zip(serialized_data, snapshot_data)]
)

if isinstance(serialized_data[0], ChatComplete):
return len(snapshot_data) == len(serialized_data) and all(
[eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"choices": [
{
"finish_reason": "eos_token",
"index": 1,
"logprobs": null,
"text": " PR for more information?"
},
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": "le Business Incubator is providing a workspace"
},
{
"finish_reason": "length",
"index": 2,
"logprobs": null,
"text": " severely flawed and often has a substandard"
},
{
"finish_reason": "length",
"index": 3,
"logprobs": null,
"text": "hd20220811-"
}
],
"created": 1713284455,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 36,
"prompt_tokens": 8,
"total_tokens": 44
}
}
Loading

0 comments on commit 9189169

Please sign in to comment.