diff --git a/llmfoundry/tp/__init__.py b/llmfoundry/tp/__init__.py index 55ceda0b76..48b788befa 100644 --- a/llmfoundry/tp/__init__.py +++ b/llmfoundry/tp/__init__.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 from llmfoundry.registry import tp_strategies -from llmfoundry.tp.tp_strategies import ffn_tp_strategies +from llmfoundry.tp.ffn_tp_strategy import ffn -tp_strategies.register('ffn', func=ffn_tp_strategies) +tp_strategies.register('ffn', func=ffn) __all__ = [ - 'ffn_tp_strategies', + 'ffn', ] diff --git a/llmfoundry/tp/tp_strategies.py b/llmfoundry/tp/ffn_tp_strategy.py similarity index 96% rename from llmfoundry/tp/tp_strategies.py rename to llmfoundry/tp/ffn_tp_strategy.py index 7dbfa3b90c..2804bfc747 100644 --- a/llmfoundry/tp/tp_strategies.py +++ b/llmfoundry/tp/ffn_tp_strategy.py @@ -11,7 +11,7 @@ from torch.distributed.tensor.parallel.style import ParallelStyle -def ffn_tp_strategies(model: ComposerModel) -> dict[str, ParallelStyle]: +def ffn(model: ComposerModel) -> dict[str, ParallelStyle]: TP_LAYERS = {'ffn', 'ffn.up_proj', 'ffn.down_proj'} # Validate that all TP_LAYERS are in model diff --git a/tests/models/utils/test_tp_strategies.py b/tests/models/utils/test_tp_strategies.py index 357e146f11..f0c5e40be7 100644 --- a/tests/models/utils/test_tp_strategies.py +++ b/tests/models/utils/test_tp_strategies.py @@ -24,7 +24,7 @@ @pytest.mark.filterwarnings( 'ignore:tp_strategies is experimental and may change with future versions.', ) -def test_ffn_tp_strategies_layer_plan(): +def test_ffn_tp_strategy(): # Create layer plan from fnn tp_strategy tp_config = { 'strategy': 'ffn', @@ -133,7 +133,3 @@ def test_no_tp_with_moes(): match='Tensor Parallelism is not currently supported for MoE models.', ): process_init_device(model_cfg, fsdp_cfg, tp_cfg) - - -# if __name__ == '__main__': -# test_ffn_tp_strategies_layer_plan()