Prevent Graph break in Llama when using flash attention (huggingface#…

…1301)
tjs-intel · Aug 30, 2024 · a1a92c9 · a1a92c9
1 parent 35e0145
commit a1a92c9
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/optimum/habana/transformers/models/llama/modeling_llama.py b/optimum/habana/transformers/models/llama/modeling_llama.py
@@ -617,7 +617,7 @@ def pre_attn_forward(
         else:
             past_key_value = None
 
-        if use_flash_attention and FusedSDPA:
+        if use_flash_attention and FusedSDPA is not None:
             import habana_frameworks.torch.hpu as ht
 
             softmax_mode = "fast" if flash_attention_fast_softmax else "None"