diff --git a/python/huggingfaceserver/huggingfaceserver/model.py b/python/huggingfaceserver/huggingfaceserver/model.py
index 0d2ef725145..7575ee96064 100644
--- a/python/huggingfaceserver/huggingfaceserver/model.py
+++ b/python/huggingfaceserver/huggingfaceserver/model.py
@@ -196,6 +196,7 @@ async def generate(self, generate_request: GenerateRequest, headers: Dict[str, s
 
     async def predict(self, input_batch: Union[BatchEncoding, InferRequest], context: Dict[str, Any] = None) \
             -> Union[Tensor, InferResponse]:
+        response_headers = {}
         if self.predictor_host:
             # when predictor_host is provided, serialize the tensor and send to optimized model serving runtime
             # like NVIDIA triton inference server
@@ -208,7 +209,7 @@ async def predict(self, input_batch: Union[BatchEncoding, InferRequest], context
                         outputs = self.model.generate(**input_batch)
                     else:
                         outputs = self.model(**input_batch).logits
-                    return outputs
+                    return outputs, response_headers
             except Exception as e:
                 raise InferenceError(str(e))
 
diff --git a/python/huggingfaceserver/huggingfaceserver/test_model.py b/python/huggingfaceserver/huggingfaceserver/test_model.py
index 0cf957d0696..4db5bebefce 100644
--- a/python/huggingfaceserver/huggingfaceserver/test_model.py
+++ b/python/huggingfaceserver/huggingfaceserver/test_model.py
@@ -27,7 +27,7 @@ def test_t5():
     model.load()
 
     request = "translate this to germany"
-    response = asyncio.run(model({"instances": [request, request]}, headers={}))
+    response, response_headers = asyncio.run(model({"instances": [request, request]}, headers={}))
     assert response == {"predictions": ['Das ist für Deutschland', 'Das ist für Deutschland']}
 
 
@@ -35,7 +35,7 @@ def test_bert():
     model = HuggingfaceModel("bert-base-uncased", {"model_id": "bert-base-uncased", "do_lower_case": True})
     model.load()
 
-    response = asyncio.run(model({"instances": ["The capital of France is [MASK].",
+    response, response_headers = asyncio.run(model({"instances": ["The capital of France is [MASK].",
                                                 "The capital of [MASK] is paris."]}, headers={}))
     assert response == {"predictions": ["paris", "france"]}
 
@@ -51,7 +51,7 @@ def test_bert_predictor_host(httpx_mock: HTTPXMock):
         predictor_host="localhost:8081", predictor_protocol="v2"))
     model.load()
 
-    response = asyncio.run(model({"instances": ["The capital of France is [MASK]."]}, headers={}))
+    response, response_headers = asyncio.run(model({"instances": ["The capital of France is [MASK]."]}, headers={}))
     assert response == {"predictions": ["[PAD]"]}
 
 
@@ -62,7 +62,7 @@ def test_bert_sequence_classification():
     model.load()
 
     request = "Hello, my dog is cute."
-    response = asyncio.run(model({"instances": [request, request]}, headers={}))
+    response, response_headers = asyncio.run(model({"instances": [request, request]}, headers={}))
     assert response == {"predictions": [1, 1]}
 
 
@@ -73,7 +73,7 @@ def test_bert_token_classification():
     model.load()
 
     request = "HuggingFace is a company based in Paris and New York"
-    response = asyncio.run(model({"instances": [request, request]}, headers={}))
+    response, response_headers = asyncio.run(model({"instances": [request, request]}, headers={}))
     assert response == {"predictions": [[[0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                                         [[0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]}
 
diff --git a/python/kserve/test/test_server.py b/python/kserve/test/test_server.py
index 65e4bc81eb4..97e12905337 100644
--- a/python/kserve/test/test_server.py
+++ b/python/kserve/test/test_server.py
@@ -333,6 +333,7 @@ def test_infer_parameters_v2(self, http_server_client):
 
         input_data = json.dumps(req.to_rest()).encode('utf-8')
         with patch.object(DummyModel, 'predict', new_callable=mock.Mock) as mock_predict:
+            response_headers = {}
             mock_predict.return_value = InferResponse(model_name="TestModel", response_id="123",
                                                       parameters={
                                                           "test-str": "dummy",
@@ -347,7 +348,7 @@ def test_infer_parameters_v2(self, http_server_client):
                                                                           "test-str": "dummy",
                                                                           "test-bool": True,
                                                                           "test-int": 100
-                                                                      })])
+                                                                      })]), response_headers
             resp = http_server_client.post('/v2/models/TestModel/infer', content=input_data)
             mock_predict.assert_called_with(req, mock.ANY)
 
diff --git a/python/lgbserver/lgbserver/test_model.py b/python/lgbserver/lgbserver/test_model.py
index 6e1c6b98fad..0368a043ef1 100644
--- a/python/lgbserver/lgbserver/test_model.py
+++ b/python/lgbserver/lgbserver/test_model.py
@@ -47,39 +47,37 @@ def test_model():
                'petal_width_(cm)': {0: 0.2}, 'sepal_length_(cm)': {0: 5.1}}
 
     response, response_headers = model.predict({"inputs": [request, request]})
-    assert numpy.argmax(response["predictions"][0]) == 2
+    assert numpy.argmax(response["predictions"][0]) == 0
 
-    response, response_headers = model.predict(
-        {"instances": [request, request]})
-    assert numpy.argmax(response["predictions"][0]) == 2
+    response, response_headers = model.predict({"instances": [request, request]})
+    assert numpy.argmax(response["predictions"][0]) == 0
 
     request = [
         {'sepal_width_(cm)': 3.5}, {'petal_length_(cm)': 1.4},
         {'petal_width_(cm)': 0.2}, {'sepal_length_(cm)': 5.1}
     ]
-    response = model.predict({"inputs": [request, request]})
+    response, response_headers = model.predict({"inputs": [request, request]})
     assert numpy.argmax(response["predictions"][0]) == 0
 
-    response = model.predict({"instances": [request, request]})
+    response, response_headers = model.predict({"instances": [request, request]})
     assert numpy.argmax(response["predictions"][0]) == 0
 
     request = [
         {'sepal_width_(cm)': 3.5}, {'petal_length_(cm)': 1.4},
         {'petal_width_(cm)': 0.2}
     ]
-    response = model.predict({"inputs": [request, request]})
+    response, response_headers = model.predict({"inputs": [request, request]})
     assert numpy.argmax(response["predictions"][0]) == 0
 
-    response = model.predict({"instances": [request, request]})
+    response, response_headers = model.predict({"instances": [request, request]})
     assert numpy.argmax(response["predictions"][0]) == 0
 
     # test v2 handler
     infer_input = InferInput(name="input-0", shape=[2, 4], datatype="FP32",
                              data=[[6.8, 2.8, 4.8, 1.6], [6.0, 3.4, 4.5, 1.6]])
-    infer_request = InferRequest(
-        model_name="model", infer_inputs=[infer_input])
+    infer_request = InferRequest(model_name="model", infer_inputs=[infer_input])
     infer_response, response_headers = model.predict(infer_request)
     assert infer_response.to_rest()["outputs"] == \
-        [{'name': 'output-0', 'shape': [2, 3], 'datatype': 'FP64',
-          'data': [3.7899802486733807e-06, 0.9996982074114203, 0.00029800260833088297,
-                   5.2172911836629736e-05, 0.99973341723876, 0.000214409849403366]}]
+           [{'name': 'output-0', 'shape': [2, 3], 'datatype': 'FP64',
+             'data': [3.7899802486733807e-06, 0.9996982074114203, 0.00029800260833088297,
+                      5.2172911836629736e-05, 0.99973341723876, 0.000214409849403366]}]
diff --git a/python/test_resources/graph/success_200_isvc/model.py b/python/test_resources/graph/success_200_isvc/model.py
index bf2a4d6fe7f..0ec88cf15ce 100644
--- a/python/test_resources/graph/success_200_isvc/model.py
+++ b/python/test_resources/graph/success_200_isvc/model.py
@@ -32,7 +32,8 @@ def load(self):
         self.ready = True
 
     def predict(self, payload: Union[Dict, InferRequest, ModelInferRequest], headers) -> Dict:
-        return {"message": "SUCCESS"}
+        response_headers = {}
+        return {"message": "SUCCESS"}, response_headers
 
 
 parser = argparse.ArgumentParser(parents=[kserve.model_server.parser])