Skip to content

Commit

Permalink
Added missing response_headers variable in predict call.
Browse files Browse the repository at this point in the history
Signed-off-by: Andrews Arokiam <[email protected]>
  • Loading branch information
andyi2it committed Jan 29, 2024
1 parent 2c379a8 commit 1f1764a
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 21 deletions.
3 changes: 2 additions & 1 deletion python/huggingfaceserver/huggingfaceserver/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ async def generate(self, generate_request: GenerateRequest, headers: Dict[str, s

async def predict(self, input_batch: Union[BatchEncoding, InferRequest], context: Dict[str, Any] = None) \
-> Union[Tensor, InferResponse]:
response_headers = {}
if self.predictor_host:
# when predictor_host is provided, serialize the tensor and send to optimized model serving runtime
# like NVIDIA triton inference server
Expand All @@ -208,7 +209,7 @@ async def predict(self, input_batch: Union[BatchEncoding, InferRequest], context
outputs = self.model.generate(**input_batch)
else:
outputs = self.model(**input_batch).logits
return outputs
return outputs, response_headers
except Exception as e:
raise InferenceError(str(e))

Expand Down
10 changes: 5 additions & 5 deletions python/huggingfaceserver/huggingfaceserver/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ def test_t5():
model.load()

request = "translate this to germany"
response = asyncio.run(model({"instances": [request, request]}, headers={}))
response, response_headers = asyncio.run(model({"instances": [request, request]}, headers={}))
assert response == {"predictions": ['Das ist für Deutschland', 'Das ist für Deutschland']}


def test_bert():
model = HuggingfaceModel("bert-base-uncased", {"model_id": "bert-base-uncased", "do_lower_case": True})
model.load()

response = asyncio.run(model({"instances": ["The capital of France is [MASK].",
response, response_headers = asyncio.run(model({"instances": ["The capital of France is [MASK].",
"The capital of [MASK] is paris."]}, headers={}))
assert response == {"predictions": ["paris", "france"]}

Expand All @@ -51,7 +51,7 @@ def test_bert_predictor_host(httpx_mock: HTTPXMock):
predictor_host="localhost:8081", predictor_protocol="v2"))
model.load()

response = asyncio.run(model({"instances": ["The capital of France is [MASK]."]}, headers={}))
response, response_headers = asyncio.run(model({"instances": ["The capital of France is [MASK]."]}, headers={}))
assert response == {"predictions": ["[PAD]"]}


Expand All @@ -62,7 +62,7 @@ def test_bert_sequence_classification():
model.load()

request = "Hello, my dog is cute."
response = asyncio.run(model({"instances": [request, request]}, headers={}))
response, response_headers = asyncio.run(model({"instances": [request, request]}, headers={}))
assert response == {"predictions": [1, 1]}


Expand All @@ -73,7 +73,7 @@ def test_bert_token_classification():
model.load()

request = "HuggingFace is a company based in Paris and New York"
response = asyncio.run(model({"instances": [request, request]}, headers={}))
response, response_headers = asyncio.run(model({"instances": [request, request]}, headers={}))
assert response == {"predictions": [[[0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]}

Expand Down
3 changes: 2 additions & 1 deletion python/kserve/test/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ def test_infer_parameters_v2(self, http_server_client):

input_data = json.dumps(req.to_rest()).encode('utf-8')
with patch.object(DummyModel, 'predict', new_callable=mock.Mock) as mock_predict:
response_headers = {}
mock_predict.return_value = InferResponse(model_name="TestModel", response_id="123",
parameters={
"test-str": "dummy",
Expand All @@ -347,7 +348,7 @@ def test_infer_parameters_v2(self, http_server_client):
"test-str": "dummy",
"test-bool": True,
"test-int": 100
})])
})]), response_headers
resp = http_server_client.post('/v2/models/TestModel/infer', content=input_data)
mock_predict.assert_called_with(req, mock.ANY)

Expand Down
24 changes: 11 additions & 13 deletions python/lgbserver/lgbserver/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,39 +47,37 @@ def test_model():
'petal_width_(cm)': {0: 0.2}, 'sepal_length_(cm)': {0: 5.1}}

response, response_headers = model.predict({"inputs": [request, request]})
assert numpy.argmax(response["predictions"][0]) == 2
assert numpy.argmax(response["predictions"][0]) == 0

response, response_headers = model.predict(
{"instances": [request, request]})
assert numpy.argmax(response["predictions"][0]) == 2
response, response_headers = model.predict({"instances": [request, request]})
assert numpy.argmax(response["predictions"][0]) == 0

request = [
{'sepal_width_(cm)': 3.5}, {'petal_length_(cm)': 1.4},
{'petal_width_(cm)': 0.2}, {'sepal_length_(cm)': 5.1}
]
response = model.predict({"inputs": [request, request]})
response, response_headers = model.predict({"inputs": [request, request]})
assert numpy.argmax(response["predictions"][0]) == 0

response = model.predict({"instances": [request, request]})
response, response_headers = model.predict({"instances": [request, request]})
assert numpy.argmax(response["predictions"][0]) == 0

request = [
{'sepal_width_(cm)': 3.5}, {'petal_length_(cm)': 1.4},
{'petal_width_(cm)': 0.2}
]
response = model.predict({"inputs": [request, request]})
response, response_headers = model.predict({"inputs": [request, request]})
assert numpy.argmax(response["predictions"][0]) == 0

response = model.predict({"instances": [request, request]})
response, response_headers = model.predict({"instances": [request, request]})
assert numpy.argmax(response["predictions"][0]) == 0

# test v2 handler
infer_input = InferInput(name="input-0", shape=[2, 4], datatype="FP32",
data=[[6.8, 2.8, 4.8, 1.6], [6.0, 3.4, 4.5, 1.6]])
infer_request = InferRequest(
model_name="model", infer_inputs=[infer_input])
infer_request = InferRequest(model_name="model", infer_inputs=[infer_input])
infer_response, response_headers = model.predict(infer_request)
assert infer_response.to_rest()["outputs"] == \
[{'name': 'output-0', 'shape': [2, 3], 'datatype': 'FP64',
'data': [3.7899802486733807e-06, 0.9996982074114203, 0.00029800260833088297,
5.2172911836629736e-05, 0.99973341723876, 0.000214409849403366]}]
[{'name': 'output-0', 'shape': [2, 3], 'datatype': 'FP64',
'data': [3.7899802486733807e-06, 0.9996982074114203, 0.00029800260833088297,
5.2172911836629736e-05, 0.99973341723876, 0.000214409849403366]}]
3 changes: 2 additions & 1 deletion python/test_resources/graph/success_200_isvc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def load(self):
self.ready = True

def predict(self, payload: Union[Dict, InferRequest, ModelInferRequest], headers) -> Dict:
return {"message": "SUCCESS"}
response_headers = {}
return {"message": "SUCCESS"}, response_headers


parser = argparse.ArgumentParser(parents=[kserve.model_server.parser])
Expand Down

0 comments on commit 1f1764a

Please sign in to comment.