bias

mosaicml · Sep 25, 2023 · 6e89ab5 · 6e89ab5
1 parent 2a4d56c
commit 6e89ab5
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 12 deletions.
diff --git a/llmfoundry/models/layers/attention.py b/llmfoundry/models/layers/attention.py
@@ -419,7 +419,7 @@ def __init__(
         norm_type: str = 'low_precision_layernorm',
         fc_type: str = 'torch',
         device: Optional[str] = None,
-        no_bias: bool = False,
+        bias: bool = True,
     ):
         super().__init__()
 
@@ -452,7 +452,7 @@ def __init__(
         self.attn_dropout_p = attn_pdrop
 
         fc_kwargs = {
-            'bias': not no_bias,
+            'bias': bias,
         }
         if fc_type != 'te':
             fc_kwargs['device'] = device
@@ -560,7 +560,7 @@ def __init__(
         norm_type: str = 'low_precision_layernorm',
         fc_type: str = 'torch',
         device: Optional[str] = None,
-        no_bias: bool = False
+        bias: bool = True
     ):
         super().__init__(
             d_model=d_model,
@@ -574,7 +574,7 @@ def __init__(
             norm_type=norm_type,
             fc_type=fc_type,
             device=device,
-            no_bias=no_bias,
+            bias=bias,
         )
 
 
@@ -597,7 +597,7 @@ def __init__(
         norm_type: str = 'low_precision_layernorm',
         fc_type: str = 'torch',
         device: Optional[str] = None,
-        no_bias: bool = False
+        bias: bool = True,
     ):
         super().__init__(
             d_model=d_model,
@@ -611,7 +611,7 @@ def __init__(
             norm_type=norm_type,
             fc_type=fc_type,
             device=device,
-            no_bias=no_bias,
+            bias=bias,
         )
 
 

diff --git a/llmfoundry/models/layers/blocks.py b/llmfoundry/models/layers/blocks.py
@@ -73,7 +73,7 @@ def __init__(
             fc_type=fc_type,
             device=device,
             **attn_config_subset_for_attn_class,
-            no_bias=no_bias,
+            bias=not no_bias,
         )
         self.norm_2 = None
         if not getattr(FFN_CLASS_REGISTRY[ffn_config['ffn_type']], '_has_norm',
@@ -83,6 +83,7 @@ def __init__(
             d_model=d_model,
             expansion_ratio=expansion_ratio,
             device=device,
+            bias=not no_bias,
             **ffn_config,
         )
         self.resid_attn_dropout = nn.Dropout(resid_pdrop)

diff --git a/llmfoundry/models/layers/ffn.py b/llmfoundry/models/layers/ffn.py
@@ -24,11 +24,11 @@ def __init__(
         expansion_ratio: int,
         fc_type: str = 'torch',
         device: Optional[str] = None,
-        no_bias: bool = False,
+        bias: bool = True,
     ):
         super().__init__()
         fc_kwargs = {
-            'bias': not no_bias,
+            'bias': bias,
         }
         if fc_type != 'te':
             fc_kwargs['device'] = device
@@ -63,7 +63,7 @@ def build_ffn(
     expansion_ratio: int,
     fc_type: str = 'torch',
     device: Optional[str] = None,
-    no_bias: bool = False,
+    bias: bool = True,
     **kwargs: Any,
 ) -> nn.Module:
     ffn_type = kwargs.pop('ffn_type')
@@ -76,14 +76,14 @@ def build_ffn(
             expansion_ratio=expansion_ratio,
             fc_type=fc_type,
             device=device,
-            no_bias=no_bias,
+            bias=bias,
         )
     elif ffn_type == 'te_ln_mlp':
         assert te is not None
         return te.LayerNormMLP(
             hidden_size=d_model,
             ffn_hidden_size=d_model * expansion_ratio,
-            bias=not no_bias,
+            bias=bias,
             **kwargs,
         )