Skip to content

Commit

Permalink
Fix bug in bfloat16 optimizer related to checkpointing (#4434)
Browse files Browse the repository at this point in the history
* fix: bf16 optimizer if condition

* fix: unexpected keyword argument 'load_serial'

* fix: add load_serial arg to bf16_optimizer

* style: fix indentation

---------

Co-authored-by: Olatunji Ruwase <[email protected]>
  • Loading branch information
okoge-kaz and tjruwase authored Oct 7, 2023
1 parent c4d4679 commit ce6b3e0
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
3 changes: 2 additions & 1 deletion deepspeed/runtime/bf16_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,8 @@ def load_state_dict(self,
state_dict_list,
checkpoint_folder,
load_optimizer_states=True,
load_from_fp32_weights=False):
load_from_fp32_weights=False,
load_serial=None):
if checkpoint_folder:
self._load_universal_checkpoint(checkpoint_folder, load_optimizer_states, load_from_fp32_weights)
else:
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3287,7 +3287,7 @@ def _get_zero_param_shapes(self):
# if we don't use it, we get parameters ordered incorrectly
if hasattr(self.optimizer, "round_robin_bit16_groups"):
bit16_groups = self.optimizer.round_robin_bit16_groups
elif self.bfloat16_enabled() and not self.zero_optimization():
elif self.bfloat16_enabled() and hasattr(self.optimizer, "bf16_groups"):
bit16_groups = self.optimizer.bf16_groups
else:
bit16_groups = self.optimizer.bit16_groups if self.zero_optimization_stage(
Expand Down

0 comments on commit ce6b3e0

Please sign in to comment.