update main_pybind

Signed-off-by: Yu, Zhentao <[email protected]>
intel · Feb 29, 2024 · 15c5c10 · 15c5c10
1 parent 2cb2416
commit 15c5c10
Showing 1 changed file with 5 additions and 0 deletions.
diff --git a/neural_speed/application/main_pybind.cpp b/neural_speed/application/main_pybind.cpp
@@ -265,6 +265,11 @@ class ModelServer {
                     num_beams, do_sample, top_k, top_p, temperature, min_new_tokens, length_penalty, early_stopping,
                     n_keep, n_discard, shift_roped_k, batch_size, pad_token, memory_dtype, continuous_batching,
                     max_request_num, model_scratch_enlarge_scale);
+    static std::set<model_archs> server_supported_model_archs = {MODEL_GPTJ, MODEL_LLAMA};
+    if (server_supported_model_archs.count(params.model_arch) == 0) {
+      fprintf(stderr, "\nERROR: ModelServer only supports gpt-j, llama!\n");
+      running = false;
+    }
   }
 
   ~ModelServer() {