From 343dcd8585aac82b66b2feed9f8f19bb1f5ac4bb Mon Sep 17 00:00:00 2001
From: Vincent Chen <v.chen@databricks.com>
Date: Tue, 17 Sep 2024 10:09:36 -0700
Subject: [PATCH] Revert "Replace FSDP args (#1517)"

This reverts commit 83ab9c30e0a2432bcc6213e4cb8b55296b13e438.
---
 llmfoundry/command_utils/eval.py                         | 8 ++------
 tests/a_scripts/inference/test_convert_composer_to_hf.py | 5 ++---
 tests/models/hf/test_fsdp_weight_tying.py                | 2 +-
 tests/models/hf/test_hf_peft_wrapping.py                 | 2 +-
 tests/models/test_fsdp_act_checkpoint.py                 | 2 +-
 5 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index eca16bd815..f622ca182d 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -52,7 +52,7 @@ def evaluate_model(
     device_eval_batch_size: Union[int, float],
     eval_gauntlet_config: Optional[Union[str, dict[str, Any]]],
     eval_loader_config: Optional[Union[dict[str, Any], list[dict[str, Any]]]],
-    parallelism_config: Optional[dict[str, Any]],
+    fsdp_config: Optional[dict[str, Any]],
     loggers: list[LoggerDestination],
     python_log_level: Optional[str],
     precision: str,
@@ -99,10 +99,6 @@ def evaluate_model(
             mosaicml_logger.log_metrics(metadata)
             mosaicml_logger._flush_metadata(force_flush=True)
 
-    fsdp_config = parallelism_config.get(
-        'fsdp_config',
-        None,
-    ) if parallelism_config else None
     if fsdp_config and model.get('load_in_8bit', False):
         raise ValueError(
             'The FSDP config block is not supported when loading ' +
@@ -320,7 +316,7 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]:
              device_eval_batch_size=eval_config.device_eval_batch_size,
              eval_gauntlet_config=eval_gauntlet_config,
              eval_loader_config=eval_loader_config,
-             parallelism_config={'fsdp': fsdp_config},
+             fsdp_config=fsdp_config,
              loggers=loggers,
              python_log_level=eval_config.python_log_level,
              precision=eval_config.precision,
diff --git a/tests/a_scripts/inference/test_convert_composer_to_hf.py b/tests/a_scripts/inference/test_convert_composer_to_hf.py
index 66ec739a65..4f1bd63c62 100644
--- a/tests/a_scripts/inference/test_convert_composer_to_hf.py
+++ b/tests/a_scripts/inference/test_convert_composer_to_hf.py
@@ -1042,8 +1042,7 @@ def test_huggingface_conversion_callback(
         model=original_model,
         device='gpu',
         precision=trainer_precision,
-        parallelism_config={'fsdp': fsdp_config}
-        if fsdp_state_dict_type is not None else None,
+        fsdp_config=fsdp_config if fsdp_state_dict_type is not None else None,
         train_dataloader=train_dataloader,
         save_folder=os.path.join(tmp_path, 'checkpoints'),
         save_interval=save_interval,
@@ -1470,7 +1469,7 @@ def test_mptmoe_huggingface_conversion_callback(
     trainer = Trainer(
         model=original_model,
         device='gpu',
-        parallelism_config={'fsdp': fsdp_config},
+        fsdp_config=fsdp_config,
         train_dataloader=train_dataloader,
         save_folder=os.path.join(tmp_path, 'checkpoints'),
         save_interval=save_interval,
diff --git a/tests/models/hf/test_fsdp_weight_tying.py b/tests/models/hf/test_fsdp_weight_tying.py
index 8e6c113169..69ced673a1 100644
--- a/tests/models/hf/test_fsdp_weight_tying.py
+++ b/tests/models/hf/test_fsdp_weight_tying.py
@@ -91,7 +91,7 @@ def test_fsdp_weight_tying(
     trainer = Trainer(
         model=original_model,
         device='gpu',
-        parallelism_config={'fsdp': fsdp_config},
+        fsdp_config=fsdp_config,
         train_dataloader=[],
         device_train_microbatch_size=1,
     )
diff --git a/tests/models/hf/test_hf_peft_wrapping.py b/tests/models/hf/test_hf_peft_wrapping.py
index 01acc22a60..56cb36c8c1 100644
--- a/tests/models/hf/test_hf_peft_wrapping.py
+++ b/tests/models/hf/test_hf_peft_wrapping.py
@@ -125,7 +125,7 @@ def test_lora_mixed_init(
     trainer = Trainer(
         model=original_model,
         device='gpu',
-        parallelism_config={'fsdp': fsdp_config},
+        fsdp_config=fsdp_config,
         train_dataloader=[],
         device_train_microbatch_size=1,
     )
diff --git a/tests/models/test_fsdp_act_checkpoint.py b/tests/models/test_fsdp_act_checkpoint.py
index 366bcf7786..a41574538a 100644
--- a/tests/models/test_fsdp_act_checkpoint.py
+++ b/tests/models/test_fsdp_act_checkpoint.py
@@ -59,7 +59,7 @@ def test_fsdp_act_checkpoint(
     trainer = Trainer(
         model=model,
         device='gpu',
-        parallelism_config={'fsdp': fsdp_config},
+        fsdp_config=fsdp_config,
     )
 
     assert trainer.state.fsdp_enabled