Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
update main_pybind
Browse files Browse the repository at this point in the history
Signed-off-by: Yu, Zhentao <[email protected]>
  • Loading branch information
zhentaoyu committed Feb 29, 2024
1 parent 2cb2416 commit 15c5c10
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions neural_speed/application/main_pybind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ class ModelServer {
num_beams, do_sample, top_k, top_p, temperature, min_new_tokens, length_penalty, early_stopping,
n_keep, n_discard, shift_roped_k, batch_size, pad_token, memory_dtype, continuous_batching,
max_request_num, model_scratch_enlarge_scale);
static std::set<model_archs> server_supported_model_archs = {MODEL_GPTJ, MODEL_LLAMA};
if (server_supported_model_archs.count(params.model_arch) == 0) {
fprintf(stderr, "\nERROR: ModelServer only supports gpt-j, llama!\n");
running = false;
}
}

~ModelServer() {
Expand Down

0 comments on commit 15c5c10

Please sign in to comment.