From 06b41bc3a22ed1047e5353db507a66dc949607b8 Mon Sep 17 00:00:00 2001 From: Patrice Vignola Date: Fri, 15 Dec 2023 22:04:06 -0800 Subject: [PATCH] Add fp16 support for split cache --- optimum/onnxruntime/modeling_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index e51943bd4ac..e3754049a4e 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -151,7 +151,7 @@ def __init__( self.use_fp16 = False for inp in model.get_inputs(): - if inp.name == "past_key_values" and inp.type == "tensor(float16)": + if (inp.name == "past_key_values" or inp.name in self.key_value_input_names) and inp.type == "tensor(float16)": self.use_fp16 = True break