diff --git a/test/e2e/helm/test_kserve_sklearn.py b/test/e2e/helm/test_kserve_sklearn.py index 5b8d08d992a..140c34b5428 100644 --- a/test/e2e/helm/test_kserve_sklearn.py +++ b/test/e2e/helm/test_kserve_sklearn.py @@ -28,12 +28,13 @@ constants ) -from ..common.utils import KSERVE_TEST_NAMESPACE, predict +from ..common.utils import KSERVE_TEST_NAMESPACE, predict, get_cluster_ip @pytest.mark.helm def test_sklearn_kserve(): service_name = "isvc-sklearn-helm" + protocol_version = "v2" predictor = V1beta1PredictorSpec( min_replicas=1, @@ -43,7 +44,7 @@ def test_sklearn_kserve(): ), runtime="kserve-mlserver", storage_uri="gs://seldon-models/sklearn/mms/lr_model", - protocol_version="v2", + protocol_version=protocol_version, resources=V1ResourceRequirements( requests={"cpu": "50m", "memory": "128Mi"}, limits={"cpu": "100m", "memory": "512Mi"}, @@ -65,6 +66,14 @@ def test_sklearn_kserve(): kserve_client.create(isvc) kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) + kserve_client.wait_model_ready( + service_name, + model_name=service_name, + isvc_namespace=KSERVE_TEST_NAMESPACE, + isvc_version=constants.KSERVE_V1BETA1_VERSION, + protocol_version=protocol_version, + cluster_ip=get_cluster_ip(), + ) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") diff --git a/test/e2e/logger/test_raw_logger.py b/test/e2e/logger/test_raw_logger.py index d6a09d708be..4c7961727bc 100644 --- a/test/e2e/logger/test_raw_logger.py +++ b/test/e2e/logger/test_raw_logger.py @@ -31,7 +31,6 @@ kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) -@pytest.mark.local @pytest.mark.raw def test_kserve_logger(): msg_dumper = 'message-dumper-raw' diff --git a/test/e2e/predictor/test_lightgbm.py b/test/e2e/predictor/test_lightgbm.py index 0396a9827e7..a680dd7481a 100644 --- a/test/e2e/predictor/test_lightgbm.py +++ b/test/e2e/predictor/test_lightgbm.py @@ -14,6 +14,7 @@ import json import os +import time import numpy import pytest @@ -102,8 +103,10 @@ def test_lightgbm_runtime_kserve(): @pytest.mark.fast +@pytest.mark.path_based_routing def test_lightgbm_v2_runtime_mlserver(): service_name = "isvc-lightgbm-v2-runtime" + protocol_version = "v2" predictor = V1beta1PredictorSpec( min_replicas=1, @@ -113,7 +116,7 @@ def test_lightgbm_v2_runtime_mlserver(): ), runtime="kserve-mlserver", storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris", - protocol_version="v2", + protocol_version=protocol_version, resources=V1ResourceRequirements( requests={"cpu": "50m", "memory": "128Mi"}, limits={"cpu": "1", "memory": "1Gi"}, @@ -135,6 +138,17 @@ def test_lightgbm_v2_runtime_mlserver(): kserve_client.create(isvc) kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) + # TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url + # different from the host based routing wait_model_ready will always fail. + time.sleep(10) + # kserve_client.wait_model_ready( + # service_name, + # model_name=service_name, + # isvc_namespace=KSERVE_TEST_NAMESPACE, + # isvc_version=constants.KSERVE_V1BETA1_VERSION, + # protocol_version=protocol_version, + # cluster_ip=get_cluster_ip(), + # ) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") diff --git a/test/e2e/predictor/test_sklearn.py b/test/e2e/predictor/test_sklearn.py index 94dd54ce399..ffd37e32e17 100644 --- a/test/e2e/predictor/test_sklearn.py +++ b/test/e2e/predictor/test_sklearn.py @@ -25,7 +25,7 @@ import kserve.protocol.grpc.grpc_predict_v2_pb2 as inference_pb2 -from ..common.utils import KSERVE_TEST_NAMESPACE, predict, predict_grpc +from ..common.utils import KSERVE_TEST_NAMESPACE, predict, predict_grpc, get_cluster_ip @pytest.mark.slow @@ -62,11 +62,13 @@ def test_sklearn_kserve(): @pytest.mark.slow def test_sklearn_v2_mlserver(): service_name = "sklearn-v2-mlserver" + protocol_version = "v2" + predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://seldon-models/sklearn/mms/lr_model", - protocol_version="v2", + protocol_version=protocol_version, resources=V1ResourceRequirements( requests={"cpu": "50m", "memory": "128Mi"}, limits={"cpu": "100m", "memory": "512Mi"}, @@ -86,6 +88,14 @@ def test_sklearn_v2_mlserver(): kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) + kserve_client.wait_model_ready( + service_name, + model_name=service_name, + isvc_namespace=KSERVE_TEST_NAMESPACE, + isvc_version=constants.KSERVE_V1BETA1_VERSION, + protocol_version=protocol_version, + cluster_ip=get_cluster_ip(), + ) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1, 1] @@ -131,6 +141,7 @@ def test_sklearn_runtime_kserve(): @pytest.mark.slow def test_sklearn_v2_runtime_mlserver(): service_name = "isvc-sklearn-v2-runtime" + protocol_version = "v2" predictor = V1beta1PredictorSpec( min_replicas=1, @@ -140,7 +151,7 @@ def test_sklearn_v2_runtime_mlserver(): ), runtime="kserve-mlserver", storage_uri="gs://seldon-models/sklearn/mms/lr_model", - protocol_version="v2", + protocol_version=protocol_version, resources=V1ResourceRequirements( requests={"cpu": "50m", "memory": "128Mi"}, limits={"cpu": "100m", "memory": "512Mi"}, @@ -160,6 +171,14 @@ def test_sklearn_v2_runtime_mlserver(): kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) + kserve_client.wait_model_ready( + service_name, + model_name=service_name, + isvc_namespace=KSERVE_TEST_NAMESPACE, + isvc_version=constants.KSERVE_V1BETA1_VERSION, + protocol_version=protocol_version, + cluster_ip=get_cluster_ip(), + ) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1, 1] diff --git a/test/e2e/predictor/test_xgboost.py b/test/e2e/predictor/test_xgboost.py index 1f92ef85beb..a21aae09a5c 100644 --- a/test/e2e/predictor/test_xgboost.py +++ b/test/e2e/predictor/test_xgboost.py @@ -14,6 +14,8 @@ import json import os +import time + import pytest from kubernetes import client from kubernetes.client import V1ContainerPort, V1EnvVar, V1ResourceRequirements @@ -60,14 +62,17 @@ def test_xgboost_kserve(): @pytest.mark.fast +@pytest.mark.path_based_routing def test_xgboost_v2_mlserver(): service_name = "isvc-xgboost-v2-mlserver" + protocol_version = "v2" + predictor = V1beta1PredictorSpec( min_replicas=1, xgboost=V1beta1XGBoostSpec( storage_uri="gs://kfserving-examples/models/xgboost/iris", env=[V1EnvVar(name="MLSERVER_MODEL_PARALLEL_WORKERS", value="0")], - protocol_version="v2", + protocol_version=protocol_version, resources=V1ResourceRequirements( requests={"cpu": "50m", "memory": "128Mi"}, limits={"cpu": "100m", "memory": "1024Mi"}, @@ -89,6 +94,17 @@ def test_xgboost_v2_mlserver(): kserve_client.create(isvc) kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) + # TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url + # different from the host based routing wait_model_ready will always fail. + time.sleep(10) + # kserve_client.wait_model_ready( + # service_name, + # model_name=service_name, + # isvc_namespace=KSERVE_TEST_NAMESPACE, + # isvc_version=constants.KSERVE_V1BETA1_VERSION, + # protocol_version=protocol_version, + # cluster_ip=get_cluster_ip(), + # ) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") @@ -134,8 +150,10 @@ def test_xgboost_runtime_kserve(): @pytest.mark.fast +@pytest.mark.path_based_routing def test_xgboost_v2_runtime_mlserver(): service_name = "isvc-xgboost-v2-runtime" + protocol_version = "v2" predictor = V1beta1PredictorSpec( min_replicas=1, @@ -145,7 +163,7 @@ def test_xgboost_v2_runtime_mlserver(): ), runtime="kserve-mlserver", storage_uri="gs://kfserving-examples/models/xgboost/iris", - protocol_version="v2", + protocol_version=protocol_version, resources=V1ResourceRequirements( requests={"cpu": "50m", "memory": "128Mi"}, limits={"cpu": "100m", "memory": "1024Mi"}, @@ -167,6 +185,17 @@ def test_xgboost_v2_runtime_mlserver(): kserve_client.create(isvc) kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) + # TODO: Remove sleep once wait_model_ready supports path based routing. Since path based routing genarates a url + # different from the host based routing wait_model_ready will always fail. + time.sleep(10) + # kserve_client.wait_model_ready( + # service_name, + # model_name=service_name, + # isvc_namespace=KSERVE_TEST_NAMESPACE, + # isvc_version=constants.KSERVE_V1BETA1_VERSION, + # protocol_version=protocol_version, + # cluster_ip=get_cluster_ip(), + # ) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") diff --git a/test/e2e/pytest.ini b/test/e2e/pytest.ini index fa4fd0043ab..7fc0a991689 100644 --- a/test/e2e/pytest.ini +++ b/test/e2e/pytest.ini @@ -12,3 +12,5 @@ markers = raw: raw e2e tests kourier: e2e tests using kourier as networking layer collocation: transformer and predictor collocation e2e tests + predictor: predictor e2e tests including grpc + path_based_routing: e2e test for path based routing