diff --git a/llmfoundry/__init__.py b/llmfoundry/__init__.py index b851aaa559..07e8f35747 100644 --- a/llmfoundry/__init__.py +++ b/llmfoundry/__init__.py @@ -48,6 +48,7 @@ models, optim, tokenizers, + tp, utils, ) from llmfoundry._version import __version__ @@ -87,5 +88,6 @@ 'models', 'optim', 'tokenizers', + 'tp', 'utils', ] diff --git a/llmfoundry/models/__init__.py b/llmfoundry/models/__init__.py index 569ef116f0..827fe2ce56 100644 --- a/llmfoundry/models/__init__.py +++ b/llmfoundry/models/__init__.py @@ -15,8 +15,7 @@ MPTModel, MPTPreTrainedModel, ) -from llmfoundry.models.utils.tp_strategies import ffn_tp_strategies -from llmfoundry.registry import models, tp_strategies +from llmfoundry.registry import models models.register('mpt_causal_lm', func=ComposerMPTCausalLM) models.register('hf_causal_lm', func=ComposerHFCausalLM) @@ -25,7 +24,6 @@ models.register('fmapi_causal_lm', func=FMAPICasualLMEvalWrapper) models.register('openai_chat', func=OpenAIChatAPIEvalWrapper) models.register('fmapi_chat', func=FMAPIChatAPIEvalWrapper) -tp_strategies.register('ffn', func=ffn_tp_strategies) __all__ = [ 'ComposerHFCausalLM', @@ -39,5 +37,4 @@ 'FMAPICasualLMEvalWrapper', 'OpenAIChatAPIEvalWrapper', 'FMAPIChatAPIEvalWrapper', - 'ffn_tp_strategies', ] diff --git a/llmfoundry/tp/__init__.py b/llmfoundry/tp/__init__.py new file mode 100644 index 0000000000..55ceda0b76 --- /dev/null +++ b/llmfoundry/tp/__init__.py @@ -0,0 +1,11 @@ +# Copyright 2024 MosaicML LLM Foundry authors +# SPDX-License-Identifier: Apache-2.0 + +from llmfoundry.registry import tp_strategies +from llmfoundry.tp.tp_strategies import ffn_tp_strategies + +tp_strategies.register('ffn', func=ffn_tp_strategies) + +__all__ = [ + 'ffn_tp_strategies', +] diff --git a/llmfoundry/models/utils/tp_strategies.py b/llmfoundry/tp/tp_strategies.py similarity index 97% rename from llmfoundry/models/utils/tp_strategies.py rename to llmfoundry/tp/tp_strategies.py index 5613589c2b..7dbfa3b90c 100644 --- a/llmfoundry/models/utils/tp_strategies.py +++ b/llmfoundry/tp/tp_strategies.py @@ -12,7 +12,7 @@ def ffn_tp_strategies(model: ComposerModel) -> dict[str, ParallelStyle]: - TP_LAYERS = {'up_proj', 'down_proj'} + TP_LAYERS = {'ffn', 'ffn.up_proj', 'ffn.down_proj'} # Validate that all TP_LAYERS are in model tp_layers_in_model = { diff --git a/tests/models/utils/test_tp_strategies.py b/tests/models/utils/test_tp_strategies.py index 3d0758e928..357e146f11 100644 --- a/tests/models/utils/test_tp_strategies.py +++ b/tests/models/utils/test_tp_strategies.py @@ -25,7 +25,7 @@ 'ignore:tp_strategies is experimental and may change with future versions.', ) def test_ffn_tp_strategies_layer_plan(): - # Actual layer plan from tp_strategies=fnn + # Create layer plan from fnn tp_strategy tp_config = { 'strategy': 'ffn', }