From 343dcd8585aac82b66b2feed9f8f19bb1f5ac4bb Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 17 Sep 2024 10:09:36 -0700 Subject: [PATCH] Revert "Replace FSDP args (#1517)" This reverts commit 83ab9c30e0a2432bcc6213e4cb8b55296b13e438. --- llmfoundry/command_utils/eval.py | 8 ++------ tests/a_scripts/inference/test_convert_composer_to_hf.py | 5 ++--- tests/models/hf/test_fsdp_weight_tying.py | 2 +- tests/models/hf/test_hf_peft_wrapping.py | 2 +- tests/models/test_fsdp_act_checkpoint.py | 2 +- 5 files changed, 7 insertions(+), 12 deletions(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index eca16bd815..f622ca182d 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -52,7 +52,7 @@ def evaluate_model( device_eval_batch_size: Union[int, float], eval_gauntlet_config: Optional[Union[str, dict[str, Any]]], eval_loader_config: Optional[Union[dict[str, Any], list[dict[str, Any]]]], - parallelism_config: Optional[dict[str, Any]], + fsdp_config: Optional[dict[str, Any]], loggers: list[LoggerDestination], python_log_level: Optional[str], precision: str, @@ -99,10 +99,6 @@ def evaluate_model( mosaicml_logger.log_metrics(metadata) mosaicml_logger._flush_metadata(force_flush=True) - fsdp_config = parallelism_config.get( - 'fsdp_config', - None, - ) if parallelism_config else None if fsdp_config and model.get('load_in_8bit', False): raise ValueError( 'The FSDP config block is not supported when loading ' + @@ -320,7 +316,7 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]: device_eval_batch_size=eval_config.device_eval_batch_size, eval_gauntlet_config=eval_gauntlet_config, eval_loader_config=eval_loader_config, - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, loggers=loggers, python_log_level=eval_config.python_log_level, precision=eval_config.precision, diff --git a/tests/a_scripts/inference/test_convert_composer_to_hf.py b/tests/a_scripts/inference/test_convert_composer_to_hf.py index 66ec739a65..4f1bd63c62 100644 --- a/tests/a_scripts/inference/test_convert_composer_to_hf.py +++ b/tests/a_scripts/inference/test_convert_composer_to_hf.py @@ -1042,8 +1042,7 @@ def test_huggingface_conversion_callback( model=original_model, device='gpu', precision=trainer_precision, - parallelism_config={'fsdp': fsdp_config} - if fsdp_state_dict_type is not None else None, + fsdp_config=fsdp_config if fsdp_state_dict_type is not None else None, train_dataloader=train_dataloader, save_folder=os.path.join(tmp_path, 'checkpoints'), save_interval=save_interval, @@ -1470,7 +1469,7 @@ def test_mptmoe_huggingface_conversion_callback( trainer = Trainer( model=original_model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, train_dataloader=train_dataloader, save_folder=os.path.join(tmp_path, 'checkpoints'), save_interval=save_interval, diff --git a/tests/models/hf/test_fsdp_weight_tying.py b/tests/models/hf/test_fsdp_weight_tying.py index 8e6c113169..69ced673a1 100644 --- a/tests/models/hf/test_fsdp_weight_tying.py +++ b/tests/models/hf/test_fsdp_weight_tying.py @@ -91,7 +91,7 @@ def test_fsdp_weight_tying( trainer = Trainer( model=original_model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, train_dataloader=[], device_train_microbatch_size=1, ) diff --git a/tests/models/hf/test_hf_peft_wrapping.py b/tests/models/hf/test_hf_peft_wrapping.py index 01acc22a60..56cb36c8c1 100644 --- a/tests/models/hf/test_hf_peft_wrapping.py +++ b/tests/models/hf/test_hf_peft_wrapping.py @@ -125,7 +125,7 @@ def test_lora_mixed_init( trainer = Trainer( model=original_model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, train_dataloader=[], device_train_microbatch_size=1, ) diff --git a/tests/models/test_fsdp_act_checkpoint.py b/tests/models/test_fsdp_act_checkpoint.py index 366bcf7786..a41574538a 100644 --- a/tests/models/test_fsdp_act_checkpoint.py +++ b/tests/models/test_fsdp_act_checkpoint.py @@ -59,7 +59,7 @@ def test_fsdp_act_checkpoint( trainer = Trainer( model=model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, ) assert trainer.state.fsdp_enabled