Skip to content

Commit

Permalink
Fix model is ready even if there is no model (kserve#3275)
Browse files Browse the repository at this point in the history
check empty model final.

Signed-off-by: HAO <[email protected]>
Co-authored-by: koshino17 <[email protected]>
  • Loading branch information
HAO2167 and koshino17 authored Jun 24, 2024
1 parent 352e030 commit d19e310
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 11 deletions.
2 changes: 1 addition & 1 deletion python/huggingfaceserver/huggingfaceserver/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def load_model():
logging.configure_logging(args.log_config_file)
try:
model = load_model()
kserve.ModelServer().start([model] if model.ready else [])
kserve.ModelServer().start([model])
except Exception as e:
import sys

Expand Down
13 changes: 13 additions & 0 deletions python/kserve/kserve/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,16 @@ async def not_implemented_error_handler(_, exc):
return JSONResponse(
status_code=HTTPStatus.NOT_IMPLEMENTED, content={"error": str(exc)}
)


class NoModelReady(RuntimeError):
def __init__(self, models: [], detail: str = None):
self.models = models
self.detail = detail

def __str__(self):
model_name_list = [model.name for model in self.models]
self.error_msg = f"Models with name {','.join(model_name_list)} are not ready."
if self.detail:
self.error_msg = self.error_msg + " " + self.detail
return self.error_msg
12 changes: 9 additions & 3 deletions python/kserve/kserve/model_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from .protocol.model_repository_extension import ModelRepositoryExtension
from .protocol.rest.server import UvicornServer
from .utils import utils
from kserve.errors import NoModelReady

DEFAULT_HTTP_PORT = 8080
DEFAULT_GRPC_PORT = 8081
Expand Down Expand Up @@ -226,13 +227,18 @@ def start(
models: a list of models to register to the model server.
"""
if isinstance(models, list):
at_least_one_model_ready = False
for model in models:
if isinstance(model, BaseKServeModel):
self.register_model(model)
# pass whether to log request latency into the model
model.enable_latency_logging = self.enable_latency_logging
if model.ready:
at_least_one_model_ready = True
self.register_model(model)
# pass whether to log request latency into the model
model.enable_latency_logging = self.enable_latency_logging
else:
raise RuntimeError("Model type should be 'BaseKServeModel'")
if not at_least_one_model_ready and models:
raise NoModelReady(models)
elif isinstance(models, dict):
if all([isinstance(v, Deployment) for v in models.values()]):
# TODO: make this port number a variable
Expand Down
18 changes: 17 additions & 1 deletion python/kserve/test/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from ray import serve

from kserve import Model, ModelRepository, ModelServer
from kserve.errors import InvalidInput
from kserve.errors import InvalidInput, NoModelReady
from kserve.model import PredictorProtocol
from kserve.protocol.infer_type import (
InferInput,
Expand Down Expand Up @@ -251,6 +251,13 @@ async def load(self, name: str) -> bool:
return False


class DummyNeverReadyModel(Model):
def __init__(self, name):
super().__init__(name)
self.name = name
self.ready = False


@pytest.mark.asyncio
class TestModel:
async def test_validate(self):
Expand Down Expand Up @@ -815,3 +822,12 @@ def test_explain(self, http_server_client):
"/v1/models/TestModel:explain", content=b'{"instances":[[1,2]]}'
)
assert resp.status_code == 503


class TestWithUnhealthModel:
def test_with_not_ready_model(self):
model = DummyNeverReadyModel("Dummy")
server = ModelServer()
with pytest.raises(NoModelReady) as exc_info:
server.start([model])
assert exc_info.type == NoModelReady
4 changes: 2 additions & 2 deletions python/lgbserver/lgbserver/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
try:
model.load()
# LightGBM doesn't support multi-process, so the number of http server workers should be 1.
kserve.ModelServer(workers=1).start([model] if model.ready else [])
kserve.ModelServer(workers=1).start([model])
except ModelMissingError:
logger.error(
f"fail to load model {args.model_name} from dir {args.model_dir},"
Expand All @@ -51,4 +51,4 @@
model_repository = LightGBMModelRepository(args.model_dir, args.nthread)
# LightGBM doesn't support multi-process, so the number of http server workers should be 1.
server = kserve.ModelServer(workers=1, registered_models=model_repository)
server.start([model] if model.ready else [])
server.start([model])
4 changes: 2 additions & 2 deletions python/sklearnserver/sklearnserver/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
model = SKLearnModel(args.model_name, args.model_dir)
try:
model.load()
kserve.ModelServer().start([model] if model.ready else [])
kserve.ModelServer().start([model])

except ModelMissingError:
logger.error(
Expand All @@ -43,4 +43,4 @@

kserve.ModelServer(
registered_models=SKLearnModelRepository(args.model_dir)
).start([model] if model.ready else [])
).start([model])
4 changes: 2 additions & 2 deletions python/xgbserver/xgbserver/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
model = XGBoostModel(args.model_name, args.model_dir, args.nthread)
try:
model.load()
kserve.ModelServer().start([model] if model.ready else [])
kserve.ModelServer().start([model])
except ModelMissingError:
logger.error(
f"fail to locate model file for model {args.model_name} under dir {args.model_dir},"
Expand All @@ -50,4 +50,4 @@

kserve.ModelServer(
registered_models=XGBoostModelRepository(args.model_dir, args.nthread)
).start([model] if model.ready else [])
).start([model])

0 comments on commit d19e310

Please sign in to comment.