diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index eca16bd815..f622ca182d 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -52,7 +52,7 @@ def evaluate_model( device_eval_batch_size: Union[int, float], eval_gauntlet_config: Optional[Union[str, dict[str, Any]]], eval_loader_config: Optional[Union[dict[str, Any], list[dict[str, Any]]]], - parallelism_config: Optional[dict[str, Any]], + fsdp_config: Optional[dict[str, Any]], loggers: list[LoggerDestination], python_log_level: Optional[str], precision: str, @@ -99,10 +99,6 @@ def evaluate_model( mosaicml_logger.log_metrics(metadata) mosaicml_logger._flush_metadata(force_flush=True) - fsdp_config = parallelism_config.get( - 'fsdp_config', - None, - ) if parallelism_config else None if fsdp_config and model.get('load_in_8bit', False): raise ValueError( 'The FSDP config block is not supported when loading ' + @@ -320,7 +316,7 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]: device_eval_batch_size=eval_config.device_eval_batch_size, eval_gauntlet_config=eval_gauntlet_config, eval_loader_config=eval_loader_config, - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, loggers=loggers, python_log_level=eval_config.python_log_level, precision=eval_config.precision, diff --git a/tests/a_scripts/inference/test_convert_composer_to_hf.py b/tests/a_scripts/inference/test_convert_composer_to_hf.py index 66ec739a65..4f1bd63c62 100644 --- a/tests/a_scripts/inference/test_convert_composer_to_hf.py +++ b/tests/a_scripts/inference/test_convert_composer_to_hf.py @@ -1042,8 +1042,7 @@ def test_huggingface_conversion_callback( model=original_model, device='gpu', precision=trainer_precision, - parallelism_config={'fsdp': fsdp_config} - if fsdp_state_dict_type is not None else None, + fsdp_config=fsdp_config if fsdp_state_dict_type is not None else None, train_dataloader=train_dataloader, save_folder=os.path.join(tmp_path, 'checkpoints'), save_interval=save_interval, @@ -1470,7 +1469,7 @@ def test_mptmoe_huggingface_conversion_callback( trainer = Trainer( model=original_model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, train_dataloader=train_dataloader, save_folder=os.path.join(tmp_path, 'checkpoints'), save_interval=save_interval, diff --git a/tests/models/hf/test_fsdp_weight_tying.py b/tests/models/hf/test_fsdp_weight_tying.py index 8e6c113169..69ced673a1 100644 --- a/tests/models/hf/test_fsdp_weight_tying.py +++ b/tests/models/hf/test_fsdp_weight_tying.py @@ -91,7 +91,7 @@ def test_fsdp_weight_tying( trainer = Trainer( model=original_model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, train_dataloader=[], device_train_microbatch_size=1, ) diff --git a/tests/models/hf/test_hf_peft_wrapping.py b/tests/models/hf/test_hf_peft_wrapping.py index 01acc22a60..56cb36c8c1 100644 --- a/tests/models/hf/test_hf_peft_wrapping.py +++ b/tests/models/hf/test_hf_peft_wrapping.py @@ -125,7 +125,7 @@ def test_lora_mixed_init( trainer = Trainer( model=original_model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, train_dataloader=[], device_train_microbatch_size=1, ) diff --git a/tests/models/test_fsdp_act_checkpoint.py b/tests/models/test_fsdp_act_checkpoint.py index 366bcf7786..a41574538a 100644 --- a/tests/models/test_fsdp_act_checkpoint.py +++ b/tests/models/test_fsdp_act_checkpoint.py @@ -59,7 +59,7 @@ def test_fsdp_act_checkpoint( trainer = Trainer( model=model, device='gpu', - parallelism_config={'fsdp': fsdp_config}, + fsdp_config=fsdp_config, ) assert trainer.state.fsdp_enabled