Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stream options. #2533

Merged
merged 10 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/nix_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ jobs:
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
- name: Rust tests.
run: nix build .#checks.$(nix eval --impure --raw --expr 'builtins.currentSystem').rust -L
run: nix develop .#test --command cargo test
1 change: 1 addition & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
- name: Install
run: |
sudo apt update
sudo apt install python3.11-dev -y
make install-cpu
- name: Run server tests
Expand Down
2 changes: 2 additions & 0 deletions .redocly.lint-ignore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ docs/openapi.json:
- '#/components/schemas/GenerateResponse/properties/details/nullable'
- '#/components/schemas/StreamResponse/properties/details/nullable'
- '#/components/schemas/ChatRequest/properties/response_format/nullable'
- '#/components/schemas/ChatRequest/properties/stream_options/nullable'
- '#/components/schemas/ChatRequest/properties/tool_choice/nullable'
- '#/components/schemas/ToolChoice/nullable'
- '#/components/schemas/ChatCompletionComplete/properties/logprobs/nullable'
- '#/components/schemas/ChatCompletionChunk/properties/usage/nullable'
- '#/components/schemas/ChatCompletionChoice/properties/logprobs/nullable'
no-invalid-media-type-examples:
- '#/paths/~1/post/responses/422/content/application~1json/example'
Expand Down
6 changes: 4 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@ members = [
"backends/grpc-metadata",
"backends/trtllm",
"backends/client",
"launcher"
"launcher",
"router"
]
default-members = [
"benchmark",
"backends/v3",
"backends/grpc-metadata",
# "backends/trtllm",
"backends/client",
"launcher"
"launcher",
"router"
]
resolver = "2"

Expand Down
3 changes: 2 additions & 1 deletion clients/python/text_generation/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class ChatCompletionComplete(BaseModel):
# Log probabilities for the chat completion
logprobs: Optional[Any]
# Reason for completion
finish_reason: str
finish_reason: Optional[str]
# Usage details of the chat completion
usage: Optional[Any] = None

Expand All @@ -191,6 +191,7 @@ class ChatCompletionChunk(BaseModel):
model: str
system_fingerprint: str
choices: List[Choice]
usage: Optional[Any] = None


class Parameters(BaseModel):
Expand Down
29 changes: 29 additions & 0 deletions docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,14 @@
},
"system_fingerprint": {
"type": "string"
},
"usage": {
"allOf": [
{
"$ref": "#/components/schemas/Usage"
}
],
"nullable": true
}
}
},
Expand Down Expand Up @@ -937,6 +945,14 @@
"stream": {
"type": "boolean"
},
"stream_options": {
"allOf": [
{
"$ref": "#/components/schemas/StreamOptions"
}
],
"nullable": true
},
"temperature": {
"type": "number",
"format": "float",
Expand Down Expand Up @@ -1912,6 +1928,19 @@
}
}
},
"StreamOptions": {
"type": "object",
"required": [
"include_usage"
],
"properties": {
"include_usage": {
"type": "boolean",
"description": "If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.",
"example": "true"
}
}
},
"StreamResponse": {
"type": "object",
"required": [
Expand Down
12 changes: 6 additions & 6 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 30 additions & 22 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -67,31 +67,38 @@
'';
};
server = pkgs.python3.pkgs.callPackage ./nix/server.nix { inherit nix-filter; };
client = pkgs.python3.pkgs.callPackage ./nix/client.nix { };
in
{
checks = {
rust = with pkgs; rustPlatform.buildRustPackage {
name = "rust-checks";
src = ./.;
cargoLock = {
lockFile = ./Cargo.lock;
rust =
with pkgs;
rustPlatform.buildRustPackage {
name = "rust-checks";
src = ./.;
cargoLock = {
lockFile = ./Cargo.lock;
};
buildInputs = [ openssl.dev ];
nativeBuildInputs = [
clippy
pkg-config
protobuf
python3
rustfmt
];
buildPhase = ''
cargo check
'';
checkPhase = ''
cargo fmt -- --check
cargo test -j $NIX_BUILD_CORES
cargo clippy
'';
installPhase = "touch $out";
};
buildInputs = [ openssl.dev ];
nativeBuildInputs = [ clippy pkg-config protobuf python3 rustfmt ];
buildPhase = ''
cargo check
'';
checkPhase = ''
cargo fmt -- --check
cargo test -j $NIX_BUILD_CORES
cargo clippy
'';
installPhase = "touch $out";
} ;
};

formatter = pkgs.nixfmt-rfc-style;

devShells = with pkgs; rec {
default = pure;

Expand All @@ -106,10 +113,11 @@
test = mkShell {
buildInputs =
[
# benchmark
# launcher
# router
benchmark
launcher
router
server
client
openssl.dev
pkg-config
cargo
Expand Down
Loading
Loading