Add fp16 support for split cache

huggingface · Dec 16, 2023 · 06b41bc · 06b41bc
1 parent 7376c6a
commit 06b41bc
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
@@ -151,7 +151,7 @@ def __init__(
 
         self.use_fp16 = False
         for inp in model.get_inputs():
-            if inp.name == "past_key_values" and inp.type == "tensor(float16)":
+            if (inp.name == "past_key_values" or inp.name in self.key_value_input_names) and inp.type == "tensor(float16)":
                 self.use_fp16 = True
                 break