diff --git a/torchtune/modules/transformer.py b/torchtune/modules/transformer.py index 1420907750..24c2c180e4 100644 --- a/torchtune/modules/transformer.py +++ b/torchtune/modules/transformer.py @@ -11,7 +11,6 @@ from torch import nn from torchtune.modules import MultiHeadAttention from torchtune.modules.attention_utils import _MaskType -from torchtune.utils.logging import deprecated class TransformerSelfAttentionLayer(nn.Module): @@ -515,11 +514,6 @@ def forward( return output -@deprecated( - msg="Please use torchtune.modules.TransformerDecoder instead. \ -If you need an example, see torchtune.models.qwen2._component_builders.py \ -and how to implement torch.modules.TiedLinear for the output projection." -) class TiedEmbeddingTransformerDecoder(nn.Module): """ Transformer Decoder with tied embedding weight. A key difference between