From 15c5c10ac5a403fb8e9a8d4670f9163a341458c7 Mon Sep 17 00:00:00 2001 From: "Yu, Zhentao" Date: Thu, 29 Feb 2024 07:23:29 +0000 Subject: [PATCH] update main_pybind Signed-off-by: Yu, Zhentao --- neural_speed/application/main_pybind.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/neural_speed/application/main_pybind.cpp b/neural_speed/application/main_pybind.cpp index 56bf235d2..9bb7b9048 100644 --- a/neural_speed/application/main_pybind.cpp +++ b/neural_speed/application/main_pybind.cpp @@ -265,6 +265,11 @@ class ModelServer { num_beams, do_sample, top_k, top_p, temperature, min_new_tokens, length_penalty, early_stopping, n_keep, n_discard, shift_roped_k, batch_size, pad_token, memory_dtype, continuous_batching, max_request_num, model_scratch_enlarge_scale); + static std::set server_supported_model_archs = {MODEL_GPTJ, MODEL_LLAMA}; + if (server_supported_model_archs.count(params.model_arch) == 0) { + fprintf(stderr, "\nERROR: ModelServer only supports gpt-j, llama!\n"); + running = false; + } } ~ModelServer() {