along for ride

mosaicml · Sep 25, 2023 · cb55f34 · cb55f34
1 parent 6e89ab5
commit cb55f34
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 15 deletions.
diff --git a/llmfoundry/models/layers/attention.py b/llmfoundry/models/layers/attention.py
@@ -548,20 +548,18 @@ class MultiheadAttention(GroupedQueryAttention):
     additive bias.
     """
 
-    def __init__(
-        self,
-        d_model: int,
-        n_heads: int,
-        attn_impl: str = 'triton',
-        clip_qkv: Optional[float] = None,
-        qk_ln: bool = False,
-        softmax_scale: Optional[float] = None,
-        attn_pdrop: float = 0.0,
-        norm_type: str = 'low_precision_layernorm',
-        fc_type: str = 'torch',
-        device: Optional[str] = None,
-        bias: bool = True
-    ):
+    def __init__(self,
+                 d_model: int,
+                 n_heads: int,
+                 attn_impl: str = 'triton',
+                 clip_qkv: Optional[float] = None,
+                 qk_ln: bool = False,
+                 softmax_scale: Optional[float] = None,
+                 attn_pdrop: float = 0.0,
+                 norm_type: str = 'low_precision_layernorm',
+                 fc_type: str = 'torch',
+                 device: Optional[str] = None,
+                 bias: bool = True):
         super().__init__(
             d_model=d_model,
             n_heads=n_heads,

diff --git a/setup.py b/setup.py
@@ -89,7 +89,7 @@
     'flash-attn==1.0.9',
     'mosaicml-turbo==0.0.4',
     # PyPI does not support direct dependencies, so we remove this line before uploading from PyPI
-    'xentropy-cuda-lib@git+https://github.com/HazyResearch/[email protected].3#subdirectory=csrc/xentropy',
+    'xentropy-cuda-lib@git+https://github.com/HazyResearch/[email protected].9#subdirectory=csrc/xentropy',
 ]
 
 extra_deps['peft'] = [