From 64489f6547fd432a29d197974df2fffcf5283551 Mon Sep 17 00:00:00 2001
From: Mihir Patel <mihir.v.patel7@gmail.com>
Date: Wed, 20 Sep 2023 17:18:08 -0400
Subject: [PATCH 1/3] Update setup.py to bump flash attn

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f5d223747d..8c2351d529 100644
--- a/setup.py
+++ b/setup.py
@@ -86,7 +86,7 @@
 ]
 
 extra_deps['gpu'] = [
-    'flash-attn==v1.0.3.post0',
+    'flash-attn==2.2.2',
     'mosaicml-turbo==0.0.3',
     # PyPI does not support direct dependencies, so we remove this line before uploading from PyPI
     'xentropy-cuda-lib@git+https://github.com/HazyResearch/flash-attention.git@v1.0.3#subdirectory=csrc/xentropy',

From 263dd0ab4b774474b01e19924bc265a19b9d6673 Mon Sep 17 00:00:00 2001
From: Mihir Patel <mihir.v.patel7@gmail.com>
Date: Wed, 20 Sep 2023 17:33:33 -0400
Subject: [PATCH 2/3] bump

---
 llmfoundry/models/layers/attention.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llmfoundry/models/layers/attention.py b/llmfoundry/models/layers/attention.py
index 76969b7810..6f2be01805 100644
--- a/llmfoundry/models/layers/attention.py
+++ b/llmfoundry/models/layers/attention.py
@@ -252,7 +252,7 @@ def flash_attn_fn(
 
     reset_is_causal = _reset_is_causal(query.size(1), key.size(1), is_causal)
 
-    output_unpad = flash_attn_interface.flash_attn_unpadded_func(
+    output_unpad = flash_attn_interface.flash_attn_kvpacked_func(
         query_unpad,
         key_unpad,
         value_unpad,

From dffa67179b76c92f9caa422c6cfffa77b95796a8 Mon Sep 17 00:00:00 2001
From: Mihir Patel <mihir.v.patel7@gmail.com>
Date: Wed, 20 Sep 2023 17:47:35 -0400
Subject: [PATCH 3/3] downgradegp

---
 llmfoundry/models/layers/attention.py | 2 +-
 setup.py                              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llmfoundry/models/layers/attention.py b/llmfoundry/models/layers/attention.py
index 6f2be01805..76969b7810 100644
--- a/llmfoundry/models/layers/attention.py
+++ b/llmfoundry/models/layers/attention.py
@@ -252,7 +252,7 @@ def flash_attn_fn(
 
     reset_is_causal = _reset_is_causal(query.size(1), key.size(1), is_causal)
 
-    output_unpad = flash_attn_interface.flash_attn_kvpacked_func(
+    output_unpad = flash_attn_interface.flash_attn_unpadded_func(
         query_unpad,
         key_unpad,
         value_unpad,
diff --git a/setup.py b/setup.py
index 8c2351d529..5cf460c7b8 100644
--- a/setup.py
+++ b/setup.py
@@ -86,8 +86,8 @@
 ]
 
 extra_deps['gpu'] = [
-    'flash-attn==2.2.2',
-    'mosaicml-turbo==0.0.3',
+    'flash-attn==1.0.9',
+    'mosaicml-turbo==0.0.4',
     # PyPI does not support direct dependencies, so we remove this line before uploading from PyPI
     'xentropy-cuda-lib@git+https://github.com/HazyResearch/flash-attention.git@v1.0.3#subdirectory=csrc/xentropy',
 ]