Wait for model to be ready in mlserver e2e tests

Signed-off-by: Sivanantham Chinnaiyan <[email protected]>
sivanantha321 · Feb 14, 2024 · f060d7c · f060d7c
1 parent ac61abb
commit f060d7c
Show file tree

Hide file tree

Showing 7 changed files with 93 additions and 12 deletions.
diff --git a/test/e2e/helm/test_kserve_sklearn.py b/test/e2e/helm/test_kserve_sklearn.py
@@ -28,12 +28,13 @@
     constants
 )
 
-from ..common.utils import KSERVE_TEST_NAMESPACE, predict
+from ..common.utils import KSERVE_TEST_NAMESPACE, predict, get_cluster_ip
 
 
 @pytest.mark.helm
 def test_sklearn_kserve():
     service_name = "isvc-sklearn-helm"
+    protocol_version = "v2"
 
     predictor = V1beta1PredictorSpec(
         min_replicas=1,
@@ -43,7 +44,7 @@ def test_sklearn_kserve():
             ),
             runtime="kserve-mlserver",
             storage_uri="gs://seldon-models/sklearn/mms/lr_model",
-            protocol_version="v2",
+            protocol_version=protocol_version,
             resources=V1ResourceRequirements(
                 requests={"cpu": "50m", "memory": "128Mi"},
                 limits={"cpu": "100m", "memory": "512Mi"},
@@ -65,6 +66,14 @@ def test_sklearn_kserve():
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(
         service_name, namespace=KSERVE_TEST_NAMESPACE)
+    kserve_client.wait_model_ready(
+        service_name,
+        model_name=service_name,
+        isvc_namespace=KSERVE_TEST_NAMESPACE,
+        isvc_version=constants.KSERVE_V1BETA1_VERSION,
+        protocol_version=protocol_version,
+        cluster_ip=get_cluster_ip(),
+    )
 
     res = predict(service_name, "./data/iris_input_v2.json",
                   protocol_version="v2")

diff --git a/test/e2e/logger/test_raw_logger.py b/test/e2e/logger/test_raw_logger.py
@@ -31,7 +31,6 @@
 kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
 
 
-@pytest.mark.local
 @pytest.mark.raw
 def test_kserve_logger():
     msg_dumper = 'message-dumper-raw'

diff --git a/test/e2e/predictor/test_lightgbm.py b/test/e2e/predictor/test_lightgbm.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import time
 
 import numpy
 import pytest
@@ -102,8 +103,10 @@ def test_lightgbm_runtime_kserve():
 
 
 @pytest.mark.fast
+@pytest.mark.path_based_routing
 def test_lightgbm_v2_runtime_mlserver():
     service_name = "isvc-lightgbm-v2-runtime"
+    protocol_version = "v2"
 
     predictor = V1beta1PredictorSpec(
         min_replicas=1,
@@ -113,7 +116,7 @@ def test_lightgbm_v2_runtime_mlserver():
             ),
             runtime="kserve-mlserver",
             storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris",
-            protocol_version="v2",
+            protocol_version=protocol_version,
             resources=V1ResourceRequirements(
                 requests={"cpu": "50m", "memory": "128Mi"},
                 limits={"cpu": "1", "memory": "1Gi"},
@@ -135,6 +138,17 @@ def test_lightgbm_v2_runtime_mlserver():
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(
         service_name, namespace=KSERVE_TEST_NAMESPACE)
+    # TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url
+    # different from the host based routing wait_model_ready will always fail.
+    time.sleep(10)
+    # kserve_client.wait_model_ready(
+    #     service_name,
+    #     model_name=service_name,
+    #     isvc_namespace=KSERVE_TEST_NAMESPACE,
+    #     isvc_version=constants.KSERVE_V1BETA1_VERSION,
+    #     protocol_version=protocol_version,
+    #     cluster_ip=get_cluster_ip(),
+    # )
 
     res = predict(service_name, "./data/iris_input_v2.json",
                   protocol_version="v2")

diff --git a/test/e2e/predictor/test_mlflow.py b/test/e2e/predictor/test_mlflow.py
@@ -26,13 +26,14 @@
 from kubernetes.client import V1ResourceRequirements
 import pytest
 
-from ..common.utils import predict
+from ..common.utils import predict, get_cluster_ip
 from ..common.utils import KSERVE_TEST_NAMESPACE
 
 
 @pytest.mark.slow
 def test_mlflow_v2_runtime_kserve():
     service_name = "isvc-mlflow-v2-runtime"
+    protocol_version = "v2"
 
     predictor = V1beta1PredictorSpec(
         min_replicas=1,
@@ -41,7 +42,7 @@ def test_mlflow_v2_runtime_kserve():
                 name="mlflow",
             ),
             storage_uri="gs://kfserving-examples/models/mlflow/wine",
-            protocol_version="v2",
+            protocol_version=protocol_version,
             resources=V1ResourceRequirements(
                 requests={"cpu": "50m", "memory": "128Mi"},
                 limits={"cpu": "1", "memory": "1Gi"},
@@ -61,8 +62,16 @@ def test_mlflow_v2_runtime_kserve():
     kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
+    kserve_client.wait_model_ready(
+        service_name,
+        model_name=service_name,
+        isvc_namespace=KSERVE_TEST_NAMESPACE,
+        isvc_version=constants.KSERVE_V1BETA1_VERSION,
+        protocol_version=protocol_version,
+        cluster_ip=get_cluster_ip(),
+    )
 
-    res = predict(service_name, "./data/mlflow_input_v2.json", protocol_version="v2")
+    res = predict(service_name, "./data/mlflow_input_v2.json", protocol_version=protocol_version)
     assert res["outputs"][0]["data"] == [5.576883936610762]
 
     kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
diff --git a/test/e2e/predictor/test_sklearn.py b/test/e2e/predictor/test_sklearn.py
@@ -25,7 +25,7 @@
 
 import kserve.protocol.grpc.grpc_predict_v2_pb2 as inference_pb2
 
-from ..common.utils import KSERVE_TEST_NAMESPACE, predict, predict_grpc
+from ..common.utils import KSERVE_TEST_NAMESPACE, predict, predict_grpc, get_cluster_ip
 
 
 @pytest.mark.slow
@@ -62,11 +62,13 @@ def test_sklearn_kserve():
 @pytest.mark.slow
 def test_sklearn_v2_mlserver():
     service_name = "sklearn-v2-mlserver"
+    protocol_version = "v2"
+
     predictor = V1beta1PredictorSpec(
         min_replicas=1,
         sklearn=V1beta1SKLearnSpec(
             storage_uri="gs://seldon-models/sklearn/mms/lr_model",
-            protocol_version="v2",
+            protocol_version=protocol_version,
             resources=V1ResourceRequirements(
                 requests={"cpu": "50m", "memory": "128Mi"},
                 limits={"cpu": "100m", "memory": "512Mi"},
@@ -86,6 +88,14 @@ def test_sklearn_v2_mlserver():
     kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
+    kserve_client.wait_model_ready(
+        service_name,
+        model_name=service_name,
+        isvc_namespace=KSERVE_TEST_NAMESPACE,
+        isvc_version=constants.KSERVE_V1BETA1_VERSION,
+        protocol_version=protocol_version,
+        cluster_ip=get_cluster_ip(),
+    )
 
     res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
     assert res["outputs"][0]["data"] == [1, 1]
@@ -131,6 +141,7 @@ def test_sklearn_runtime_kserve():
 @pytest.mark.slow
 def test_sklearn_v2_runtime_mlserver():
     service_name = "isvc-sklearn-v2-runtime"
+    protocol_version = "v2"
 
     predictor = V1beta1PredictorSpec(
         min_replicas=1,
@@ -140,7 +151,7 @@ def test_sklearn_v2_runtime_mlserver():
             ),
             runtime="kserve-mlserver",
             storage_uri="gs://seldon-models/sklearn/mms/lr_model",
-            protocol_version="v2",
+            protocol_version=protocol_version,
             resources=V1ResourceRequirements(
                 requests={"cpu": "50m", "memory": "128Mi"},
                 limits={"cpu": "100m", "memory": "512Mi"},
@@ -160,6 +171,14 @@ def test_sklearn_v2_runtime_mlserver():
     kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
+    kserve_client.wait_model_ready(
+        service_name,
+        model_name=service_name,
+        isvc_namespace=KSERVE_TEST_NAMESPACE,
+        isvc_version=constants.KSERVE_V1BETA1_VERSION,
+        protocol_version=protocol_version,
+        cluster_ip=get_cluster_ip(),
+    )
 
     res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
     assert res["outputs"][0]["data"] == [1, 1]

diff --git a/test/e2e/predictor/test_xgboost.py b/test/e2e/predictor/test_xgboost.py
@@ -14,6 +14,8 @@
 
 import json
 import os
+import time
+
 import pytest
 from kubernetes import client
 from kubernetes.client import V1ContainerPort, V1EnvVar, V1ResourceRequirements
@@ -60,14 +62,17 @@ def test_xgboost_kserve():
 
 
 @pytest.mark.fast
+@pytest.mark.path_based_routing
 def test_xgboost_v2_mlserver():
     service_name = "isvc-xgboost-v2-mlserver"
+    protocol_version = "v2"
+
     predictor = V1beta1PredictorSpec(
         min_replicas=1,
         xgboost=V1beta1XGBoostSpec(
             storage_uri="gs://kfserving-examples/models/xgboost/iris",
             env=[V1EnvVar(name="MLSERVER_MODEL_PARALLEL_WORKERS", value="0")],
-            protocol_version="v2",
+            protocol_version=protocol_version,
             resources=V1ResourceRequirements(
                 requests={"cpu": "50m", "memory": "128Mi"},
                 limits={"cpu": "100m", "memory": "1024Mi"},
@@ -89,6 +94,17 @@ def test_xgboost_v2_mlserver():
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(
         service_name, namespace=KSERVE_TEST_NAMESPACE)
+    # TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url
+    # different from the host based routing wait_model_ready will always fail.
+    time.sleep(10)
+    # kserve_client.wait_model_ready(
+    #     service_name,
+    #     model_name=service_name,
+    #     isvc_namespace=KSERVE_TEST_NAMESPACE,
+    #     isvc_version=constants.KSERVE_V1BETA1_VERSION,
+    #     protocol_version=protocol_version,
+    #     cluster_ip=get_cluster_ip(),
+    # )
 
     res = predict(service_name, "./data/iris_input_v2.json",
                   protocol_version="v2")
@@ -134,8 +150,10 @@ def test_xgboost_runtime_kserve():
 
 
 @pytest.mark.fast
+@pytest.mark.path_based_routing
 def test_xgboost_v2_runtime_mlserver():
     service_name = "isvc-xgboost-v2-runtime"
+    protocol_version = "v2"
 
     predictor = V1beta1PredictorSpec(
         min_replicas=1,
@@ -145,7 +163,7 @@ def test_xgboost_v2_runtime_mlserver():
             ),
             runtime="kserve-mlserver",
             storage_uri="gs://kfserving-examples/models/xgboost/iris",
-            protocol_version="v2",
+            protocol_version=protocol_version,
             resources=V1ResourceRequirements(
                 requests={"cpu": "50m", "memory": "128Mi"},
                 limits={"cpu": "100m", "memory": "1024Mi"},
@@ -167,6 +185,17 @@ def test_xgboost_v2_runtime_mlserver():
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(
         service_name, namespace=KSERVE_TEST_NAMESPACE)
+    # TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url
+    # different from the host based routing wait_model_ready will always fail.
+    time.sleep(10)
+    # kserve_client.wait_model_ready(
+    #     service_name,
+    #     model_name=service_name,
+    #     isvc_namespace=KSERVE_TEST_NAMESPACE,
+    #     isvc_version=constants.KSERVE_V1BETA1_VERSION,
+    #     protocol_version=protocol_version,
+    #     cluster_ip=get_cluster_ip(),
+    # )
 
     res = predict(service_name, "./data/iris_input_v2.json",
                   protocol_version="v2")

diff --git a/test/e2e/pytest.ini b/test/e2e/pytest.ini
@@ -12,3 +12,5 @@ markers =
     raw: raw e2e tests
     kourier: e2e tests using kourier as networking layer
     collocation: transformer and predictor collocation e2e tests
+    predictor: predictor e2e tests including grpc
+    path_based_routing: e2e test for path based routing