From e1728f6509470eb571fe30594bb79553e0ca9478 Mon Sep 17 00:00:00 2001
From: Mihir Patel <mihir.v.patel7@gmail.com>
Date: Wed, 17 Jan 2024 13:52:38 -0500
Subject: [PATCH] Update monkeypatch to put barrier in optim load (#2874)

* wip

* bugfix

* increase retries and jitter

* logs

* logs

* remove kadabra

* add sync

* remove

* no sync

* logs

* tweak

* strip print

* strip

* upload file

* remove comment

* remove

---------

Co-authored-by: Abhinav Venigalla <abhi@mosaicml.com>
---
 composer/trainer/mosaic_fsdp_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/composer/trainer/mosaic_fsdp_utils.py b/composer/trainer/mosaic_fsdp_utils.py
index 24496ec4f4..d04449c4ae 100644
--- a/composer/trainer/mosaic_fsdp_utils.py
+++ b/composer/trainer/mosaic_fsdp_utils.py
@@ -1163,7 +1163,6 @@ def _shard_orig_param_state(
             optim_state,
             pg=fsdp_state.process_group,
             device=fsdp_state.compute_device,
-            cpu_offload=True,
         )
         if not shard_param_info.in_shard:
             return {}
@@ -1179,6 +1178,7 @@ def _shard_orig_param_state(
             ):
                 value = value.flatten()[intra_param_start_idx : intra_param_end_idx + 1].clone()  # type: ignore[operator]
             new_optim_state[state_name] = value
+        torch.cuda.synchronize()
         return new_optim_state
 
 def fsdp_state_has_default_pg(state: '_FSDPState') -> bool: