From 06b41bc3a22ed1047e5353db507a66dc949607b8 Mon Sep 17 00:00:00 2001
From: Patrice Vignola <vignola.patrice@gmail.com>
Date: Fri, 15 Dec 2023 22:04:06 -0800
Subject: [PATCH] Add fp16 support for split cache

---
 optimum/onnxruntime/modeling_decoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
index e51943bd4ac..e3754049a4e 100644
--- a/optimum/onnxruntime/modeling_decoder.py
+++ b/optimum/onnxruntime/modeling_decoder.py
@@ -151,7 +151,7 @@ def __init__(
 
         self.use_fp16 = False
         for inp in model.get_inputs():
-            if inp.name == "past_key_values" and inp.type == "tensor(float16)":
+            if (inp.name == "past_key_values" or inp.name in self.key_value_input_names) and inp.type == "tensor(float16)":
                 self.use_fp16 = True
                 break