Skip to content

Commit

Permalink
Wait for model to be ready in mlserver e2e tests
Browse files Browse the repository at this point in the history
Signed-off-by: Sivanantham Chinnaiyan <[email protected]>
  • Loading branch information
sivanantha321 committed Feb 14, 2024
1 parent ac61abb commit f060d7c
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 12 deletions.
13 changes: 11 additions & 2 deletions test/e2e/helm/test_kserve_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
constants
)

from ..common.utils import KSERVE_TEST_NAMESPACE, predict
from ..common.utils import KSERVE_TEST_NAMESPACE, predict, get_cluster_ip


@pytest.mark.helm
def test_sklearn_kserve():
service_name = "isvc-sklearn-helm"
protocol_version = "v2"

predictor = V1beta1PredictorSpec(
min_replicas=1,
Expand All @@ -43,7 +44,7 @@ def test_sklearn_kserve():
),
runtime="kserve-mlserver",
storage_uri="gs://seldon-models/sklearn/mms/lr_model",
protocol_version="v2",
protocol_version=protocol_version,
resources=V1ResourceRequirements(
requests={"cpu": "50m", "memory": "128Mi"},
limits={"cpu": "100m", "memory": "512Mi"},
Expand All @@ -65,6 +66,14 @@ def test_sklearn_kserve():
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(
service_name, namespace=KSERVE_TEST_NAMESPACE)
kserve_client.wait_model_ready(
service_name,
model_name=service_name,
isvc_namespace=KSERVE_TEST_NAMESPACE,
isvc_version=constants.KSERVE_V1BETA1_VERSION,
protocol_version=protocol_version,
cluster_ip=get_cluster_ip(),
)

res = predict(service_name, "./data/iris_input_v2.json",
protocol_version="v2")
Expand Down
1 change: 0 additions & 1 deletion test/e2e/logger/test_raw_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))


@pytest.mark.local
@pytest.mark.raw
def test_kserve_logger():
msg_dumper = 'message-dumper-raw'
Expand Down
16 changes: 15 additions & 1 deletion test/e2e/predictor/test_lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import json
import os
import time

import numpy
import pytest
Expand Down Expand Up @@ -102,8 +103,10 @@ def test_lightgbm_runtime_kserve():


@pytest.mark.fast
@pytest.mark.path_based_routing
def test_lightgbm_v2_runtime_mlserver():
service_name = "isvc-lightgbm-v2-runtime"
protocol_version = "v2"

predictor = V1beta1PredictorSpec(
min_replicas=1,
Expand All @@ -113,7 +116,7 @@ def test_lightgbm_v2_runtime_mlserver():
),
runtime="kserve-mlserver",
storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris",
protocol_version="v2",
protocol_version=protocol_version,
resources=V1ResourceRequirements(
requests={"cpu": "50m", "memory": "128Mi"},
limits={"cpu": "1", "memory": "1Gi"},
Expand All @@ -135,6 +138,17 @@ def test_lightgbm_v2_runtime_mlserver():
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(
service_name, namespace=KSERVE_TEST_NAMESPACE)
# TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url
# different from the host based routing wait_model_ready will always fail.
time.sleep(10)
# kserve_client.wait_model_ready(
# service_name,
# model_name=service_name,
# isvc_namespace=KSERVE_TEST_NAMESPACE,
# isvc_version=constants.KSERVE_V1BETA1_VERSION,
# protocol_version=protocol_version,
# cluster_ip=get_cluster_ip(),
# )

res = predict(service_name, "./data/iris_input_v2.json",
protocol_version="v2")
Expand Down
15 changes: 12 additions & 3 deletions test/e2e/predictor/test_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@
from kubernetes.client import V1ResourceRequirements
import pytest

from ..common.utils import predict
from ..common.utils import predict, get_cluster_ip
from ..common.utils import KSERVE_TEST_NAMESPACE


@pytest.mark.slow
def test_mlflow_v2_runtime_kserve():
service_name = "isvc-mlflow-v2-runtime"
protocol_version = "v2"

predictor = V1beta1PredictorSpec(
min_replicas=1,
Expand All @@ -41,7 +42,7 @@ def test_mlflow_v2_runtime_kserve():
name="mlflow",
),
storage_uri="gs://kfserving-examples/models/mlflow/wine",
protocol_version="v2",
protocol_version=protocol_version,
resources=V1ResourceRequirements(
requests={"cpu": "50m", "memory": "128Mi"},
limits={"cpu": "1", "memory": "1Gi"},
Expand All @@ -61,8 +62,16 @@ def test_mlflow_v2_runtime_kserve():
kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
kserve_client.wait_model_ready(
service_name,
model_name=service_name,
isvc_namespace=KSERVE_TEST_NAMESPACE,
isvc_version=constants.KSERVE_V1BETA1_VERSION,
protocol_version=protocol_version,
cluster_ip=get_cluster_ip(),
)

res = predict(service_name, "./data/mlflow_input_v2.json", protocol_version="v2")
res = predict(service_name, "./data/mlflow_input_v2.json", protocol_version=protocol_version)
assert res["outputs"][0]["data"] == [5.576883936610762]

kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
25 changes: 22 additions & 3 deletions test/e2e/predictor/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import kserve.protocol.grpc.grpc_predict_v2_pb2 as inference_pb2

from ..common.utils import KSERVE_TEST_NAMESPACE, predict, predict_grpc
from ..common.utils import KSERVE_TEST_NAMESPACE, predict, predict_grpc, get_cluster_ip


@pytest.mark.slow
Expand Down Expand Up @@ -62,11 +62,13 @@ def test_sklearn_kserve():
@pytest.mark.slow
def test_sklearn_v2_mlserver():
service_name = "sklearn-v2-mlserver"
protocol_version = "v2"

predictor = V1beta1PredictorSpec(
min_replicas=1,
sklearn=V1beta1SKLearnSpec(
storage_uri="gs://seldon-models/sklearn/mms/lr_model",
protocol_version="v2",
protocol_version=protocol_version,
resources=V1ResourceRequirements(
requests={"cpu": "50m", "memory": "128Mi"},
limits={"cpu": "100m", "memory": "512Mi"},
Expand All @@ -86,6 +88,14 @@ def test_sklearn_v2_mlserver():
kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
kserve_client.wait_model_ready(
service_name,
model_name=service_name,
isvc_namespace=KSERVE_TEST_NAMESPACE,
isvc_version=constants.KSERVE_V1BETA1_VERSION,
protocol_version=protocol_version,
cluster_ip=get_cluster_ip(),
)

res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
assert res["outputs"][0]["data"] == [1, 1]
Expand Down Expand Up @@ -131,6 +141,7 @@ def test_sklearn_runtime_kserve():
@pytest.mark.slow
def test_sklearn_v2_runtime_mlserver():
service_name = "isvc-sklearn-v2-runtime"
protocol_version = "v2"

predictor = V1beta1PredictorSpec(
min_replicas=1,
Expand All @@ -140,7 +151,7 @@ def test_sklearn_v2_runtime_mlserver():
),
runtime="kserve-mlserver",
storage_uri="gs://seldon-models/sklearn/mms/lr_model",
protocol_version="v2",
protocol_version=protocol_version,
resources=V1ResourceRequirements(
requests={"cpu": "50m", "memory": "128Mi"},
limits={"cpu": "100m", "memory": "512Mi"},
Expand All @@ -160,6 +171,14 @@ def test_sklearn_v2_runtime_mlserver():
kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
kserve_client.wait_model_ready(
service_name,
model_name=service_name,
isvc_namespace=KSERVE_TEST_NAMESPACE,
isvc_version=constants.KSERVE_V1BETA1_VERSION,
protocol_version=protocol_version,
cluster_ip=get_cluster_ip(),
)

res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
assert res["outputs"][0]["data"] == [1, 1]
Expand Down
33 changes: 31 additions & 2 deletions test/e2e/predictor/test_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

import json
import os
import time

import pytest
from kubernetes import client
from kubernetes.client import V1ContainerPort, V1EnvVar, V1ResourceRequirements
Expand Down Expand Up @@ -60,14 +62,17 @@ def test_xgboost_kserve():


@pytest.mark.fast
@pytest.mark.path_based_routing
def test_xgboost_v2_mlserver():
service_name = "isvc-xgboost-v2-mlserver"
protocol_version = "v2"

predictor = V1beta1PredictorSpec(
min_replicas=1,
xgboost=V1beta1XGBoostSpec(
storage_uri="gs://kfserving-examples/models/xgboost/iris",
env=[V1EnvVar(name="MLSERVER_MODEL_PARALLEL_WORKERS", value="0")],
protocol_version="v2",
protocol_version=protocol_version,
resources=V1ResourceRequirements(
requests={"cpu": "50m", "memory": "128Mi"},
limits={"cpu": "100m", "memory": "1024Mi"},
Expand All @@ -89,6 +94,17 @@ def test_xgboost_v2_mlserver():
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(
service_name, namespace=KSERVE_TEST_NAMESPACE)
# TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url
# different from the host based routing wait_model_ready will always fail.
time.sleep(10)
# kserve_client.wait_model_ready(
# service_name,
# model_name=service_name,
# isvc_namespace=KSERVE_TEST_NAMESPACE,
# isvc_version=constants.KSERVE_V1BETA1_VERSION,
# protocol_version=protocol_version,
# cluster_ip=get_cluster_ip(),
# )

res = predict(service_name, "./data/iris_input_v2.json",
protocol_version="v2")
Expand Down Expand Up @@ -134,8 +150,10 @@ def test_xgboost_runtime_kserve():


@pytest.mark.fast
@pytest.mark.path_based_routing
def test_xgboost_v2_runtime_mlserver():
service_name = "isvc-xgboost-v2-runtime"
protocol_version = "v2"

predictor = V1beta1PredictorSpec(
min_replicas=1,
Expand All @@ -145,7 +163,7 @@ def test_xgboost_v2_runtime_mlserver():
),
runtime="kserve-mlserver",
storage_uri="gs://kfserving-examples/models/xgboost/iris",
protocol_version="v2",
protocol_version=protocol_version,
resources=V1ResourceRequirements(
requests={"cpu": "50m", "memory": "128Mi"},
limits={"cpu": "100m", "memory": "1024Mi"},
Expand All @@ -167,6 +185,17 @@ def test_xgboost_v2_runtime_mlserver():
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(
service_name, namespace=KSERVE_TEST_NAMESPACE)
# TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url
# different from the host based routing wait_model_ready will always fail.
time.sleep(10)
# kserve_client.wait_model_ready(
# service_name,
# model_name=service_name,
# isvc_namespace=KSERVE_TEST_NAMESPACE,
# isvc_version=constants.KSERVE_V1BETA1_VERSION,
# protocol_version=protocol_version,
# cluster_ip=get_cluster_ip(),
# )

res = predict(service_name, "./data/iris_input_v2.json",
protocol_version="v2")
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ markers =
raw: raw e2e tests
kourier: e2e tests using kourier as networking layer
collocation: transformer and predictor collocation e2e tests
predictor: predictor e2e tests including grpc
path_based_routing: e2e test for path based routing

0 comments on commit f060d7c

Please sign in to comment.