Don't enable hlfb for KV cache in OSS.

PiperOrigin-RevId: 693441376
google-ai-edge · Nov 5, 2024 · cdba806 · cdba806
1 parent 29aa17f
commit cdba806
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/ai_edge_torch/generative/layers/kv_cache.py b/ai_edge_torch/generative/layers/kv_cache.py
@@ -161,6 +161,9 @@ def update(
   Returns:
       KVCacheEntry: The updated KVCache entry based on the passed inputs.
   """
+  # Don't enable HLFB for kv cache op for now, since it won't work with LLM
+  # inference engine. Remove this part once we ship a new LLM inference engine.
+  enable_hlfb=False
   update_func = _update_kv_hlfb_impl if enable_hlfb else _update_kv_base_impl
   return update_func(cache, input_pos, k_slice, v_slice)