From c704fc1649e57d06e3452972c77abc3d328f818f Mon Sep 17 00:00:00 2001 From: Joe Mayer <114769929+jomayeri@users.noreply.github.com> Date: Fri, 23 Feb 2024 05:12:24 -0800 Subject: [PATCH] Reduce ds_id name length (#5176) Fixing issue #5087 . Limited the naming of the ds_id in ZeRO 3 to the first and last parameters of the group instead of every parameter in the group. --- deepspeed/runtime/zero/stage3.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepspeed/runtime/zero/stage3.py b/deepspeed/runtime/zero/stage3.py index 4132a4527d6a..03813afa4ed1 100644 --- a/deepspeed/runtime/zero/stage3.py +++ b/deepspeed/runtime/zero/stage3.py @@ -864,7 +864,9 @@ def _create_fp32_partitions(self): self.device).clone().float().detach()) self.fp32_partitioned_groups_flat[i].requires_grad = True # keep this in case internal optimizer uses it - self.fp32_partitioned_groups_flat[i].ds_id = '_'.join(map(str, self.fp16_partitioned_groups_flat_id[i])) + ds_id_begin = str(self.fp16_partitioned_groups_flat_id[i][0]) + ds_id_end = str(self.fp16_partitioned_groups_flat_id[i][-1]) + self.fp32_partitioned_groups_flat[i].ds_id = ds_id_begin + '_' + ds_id_end if len(swappable_fp32_tensors) > 0: self.optimizer_swapper.initialize_parameters(parameters=swappable_fp32_tensors,