From fe6db3c33c3055c7e298916e07f8068eb19543ab Mon Sep 17 00:00:00 2001 From: JB Lau <1557853+hackyon@users.noreply.github.com> Date: Wed, 14 Feb 2024 12:30:02 -0500 Subject: [PATCH] Removing _use_sdpa attributes and their fix-copies --- src/transformers/models/altclip/modeling_altclip.py | 4 ---- src/transformers/models/camembert/modeling_camembert.py | 4 ---- src/transformers/models/clap/modeling_clap.py | 4 ---- src/transformers/models/ernie/modeling_ernie.py | 4 ---- src/transformers/models/markuplm/modeling_markuplm.py | 4 ---- src/transformers/models/roberta/modeling_roberta.py | 4 ---- src/transformers/models/roc_bert/modeling_roc_bert.py | 4 ---- src/transformers/models/xlm_roberta/modeling_xlm_roberta.py | 4 ---- .../models/xlm_roberta_xl/modeling_xlm_roberta_xl.py | 4 ---- src/transformers/models/xmod/modeling_xmod.py | 4 ---- 10 files changed, 40 deletions(-) diff --git a/src/transformers/models/altclip/modeling_altclip.py b/src/transformers/models/altclip/modeling_altclip.py index 0a3fed171670d6..91a4a867a93b2b 100755 --- a/src/transformers/models/altclip/modeling_altclip.py +++ b/src/transformers/models/altclip/modeling_altclip.py @@ -1214,7 +1214,6 @@ class AltRobertaModel(AltCLIPPreTrainedModel): config_class = AltCLIPTextConfig - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->AltRoberta def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -1224,9 +1223,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = AltRobertaPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/camembert/modeling_camembert.py b/src/transformers/models/camembert/modeling_camembert.py index d6179b5dec8e23..da4222a69417be 100644 --- a/src/transformers/models/camembert/modeling_camembert.py +++ b/src/transformers/models/camembert/modeling_camembert.py @@ -755,7 +755,6 @@ class CamembertModel(CamembertPreTrainedModel): _no_split_modules = [] - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->Camembert def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -765,9 +764,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = CamembertPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/clap/modeling_clap.py b/src/transformers/models/clap/modeling_clap.py index 49ca45602a3260..2038528c1b7950 100644 --- a/src/transformers/models/clap/modeling_clap.py +++ b/src/transformers/models/clap/modeling_clap.py @@ -1773,7 +1773,6 @@ class ClapTextModel(ClapPreTrainedModel): config_class = ClapTextConfig - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->ClapText def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -1783,9 +1782,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = ClapTextPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/ernie/modeling_ernie.py b/src/transformers/models/ernie/modeling_ernie.py index 2ab5545743bf8a..f4ad8a28907d9b 100644 --- a/src/transformers/models/ernie/modeling_ernie.py +++ b/src/transformers/models/ernie/modeling_ernie.py @@ -810,7 +810,6 @@ class ErnieModel(ErniePreTrainedModel): `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass. """ - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->Ernie def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -820,9 +819,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = ErniePooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/markuplm/modeling_markuplm.py b/src/transformers/models/markuplm/modeling_markuplm.py index adf85b014a6964..7d882ef943ffff 100755 --- a/src/transformers/models/markuplm/modeling_markuplm.py +++ b/src/transformers/models/markuplm/modeling_markuplm.py @@ -811,7 +811,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P MARKUPLM_START_DOCSTRING, ) class MarkupLMModel(MarkupLMPreTrainedModel): - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->MarkupLM def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -821,9 +820,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = MarkupLMPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index 1baf21e700578f..c36ef6b93fde0b 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -702,7 +702,6 @@ class RobertaModel(RobertaPreTrainedModel): """ - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->Roberta def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -712,9 +711,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = RobertaPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/roc_bert/modeling_roc_bert.py b/src/transformers/models/roc_bert/modeling_roc_bert.py index 2b9b8eb9aa723f..b656262619cf1e 100644 --- a/src/transformers/models/roc_bert/modeling_roc_bert.py +++ b/src/transformers/models/roc_bert/modeling_roc_bert.py @@ -892,7 +892,6 @@ class RoCBertModel(RoCBertPreTrainedModel): `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass. """ - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->RoCBert def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -902,9 +901,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = RoCBertPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py index 4dbd2c01403a7b..95cbf42308fb14 100644 --- a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py @@ -704,7 +704,6 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel): """ - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->XLMRoberta def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -714,9 +713,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = XLMRobertaPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index 26190a53e62221..5c8c1b189bc1bc 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -667,7 +667,6 @@ class XLMRobertaXLModel(XLMRobertaXLPreTrainedModel): an input to the forward pass. .. _*Attention is all you need*: https://arxiv.org/abs/1706.03762 """ - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->XLMRobertaXL def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -677,9 +676,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = XLMRobertaXLPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/xmod/modeling_xmod.py b/src/transformers/models/xmod/modeling_xmod.py index 96fe711dcdb975..ba3505678e3962 100644 --- a/src/transformers/models/xmod/modeling_xmod.py +++ b/src/transformers/models/xmod/modeling_xmod.py @@ -793,7 +793,6 @@ class XmodModel(XmodPreTrainedModel): """ - # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->Xmod def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config @@ -803,9 +802,6 @@ def __init__(self, config, add_pooling_layer=True): self.pooler = XmodPooler(config) if add_pooling_layer else None - self._use_sdpa = config._attn_implementation == "sdpa" - self.position_embedding_type = config.position_embedding_type - # Initialize weights and apply final processing self.post_init()