diff --git a/tests/unit/checkpoint/common.py b/tests/unit/checkpoint/common.py index e891d9d90f8e..001c08f1a99f 100644 --- a/tests/unit/checkpoint/common.py +++ b/tests/unit/checkpoint/common.py @@ -218,7 +218,7 @@ def checkpoint_correctness_verification(config_dict, for root, _, files in os.walk(save_folder): for f in files: if "_expert_" in f and "_model_states" in f: - expert = torch.load(os.path.join(root, f), weights_only=True) + expert = torch.load(os.path.join(root, f), weights_only=False) needed, storages = 0, {} for name, tensor in expert.items(): needed += tensor.size().numel() diff --git a/tests/unit/checkpoint/test_universal_checkpoint.py b/tests/unit/checkpoint/test_universal_checkpoint.py index 386116aea29a..46d4294bdd0d 100644 --- a/tests/unit/checkpoint/test_universal_checkpoint.py +++ b/tests/unit/checkpoint/test_universal_checkpoint.py @@ -181,7 +181,7 @@ def _run_test(self, tmpdir, dtype, ds_config, load_optim, use_torch_adam): ) hidden_dim = 10 - loaded_model_state, loaded_optimizer_state = torch.load(f"{tmpdir}/baseline_state.pt", weights_only=True) + loaded_model_state, loaded_optimizer_state = torch.load(f"{tmpdir}/baseline_state.pt", weights_only=False) ds_config["checkpoint"] = {"load_universal": True} univ_model = SimpleModel(hidden_dim) diff --git a/tests/unit/checkpoint/test_zero_optimizer.py b/tests/unit/checkpoint/test_zero_optimizer.py index 074a96b5e081..44966b331d0f 100644 --- a/tests/unit/checkpoint/test_zero_optimizer.py +++ b/tests/unit/checkpoint/test_zero_optimizer.py @@ -523,7 +523,7 @@ def test_save_exclude_frozen_weights(self, tmpdir, zero_stage): all_ckpt_folder = os.path.join(tmpdir, 'all_params') ds_engine.save_checkpoint(all_ckpt_folder) all_params_ckpt_file = get_model_ckpt_name_for_rank(os.path.join(all_ckpt_folder, 'global_step0'), '00') - loaded_all_param_model = torch.load(all_params_ckpt_file, weights_only=True)['module'] + loaded_all_param_model = torch.load(all_params_ckpt_file, weights_only=False)['module'] all_param_names = set([n for n, p in model.named_parameters()]) assert set(loaded_all_param_model.keys()) == all_param_names @@ -536,7 +536,7 @@ def test_save_exclude_frozen_weights(self, tmpdir, zero_stage): # Excluding frozen parameters should reduce checkpoint size assert os.path.getsize(all_params_ckpt_file) > os.path.getsize(trainable_ckpt_file) - loaded_trainable_param_model = torch.load(trainable_ckpt_file, weights_only=True)['module'] + loaded_trainable_param_model = torch.load(trainable_ckpt_file, weights_only=False)['module'] frozen_param_names = set([n for n, p in model.named_parameters() if not p.requires_grad]) loaded_trainable_param_names = set(loaded_trainable_param_model.keys()) overlap_names = set.intersection(loaded_trainable_param_names, frozen_param_names) @@ -575,7 +575,7 @@ def test_save_exclude_custom_frozen_weights(self, tmpdir, zero_stage): custom_state_dict_ckpt_file = get_model_ckpt_name_for_rank( os.path.join(custom_state_dict_ckpt_folder, 'global_step0'), '00') - loaded_custom_state_dict_param_model = torch.load(custom_state_dict_ckpt_file, weights_only=True)['module'] + loaded_custom_state_dict_param_model = torch.load(custom_state_dict_ckpt_file, weights_only=False)['module'] loaded_custom_state_dict_param_names = set(loaded_custom_state_dict_param_model.keys()) custom_state_dict_param_names = set([k for k, v in model.state_dict().items()]) diff --git a/tests/unit/model_parallelism/test_configurable_parallel_mp.py b/tests/unit/model_parallelism/test_configurable_parallel_mp.py index e495e946308d..a7b0d3431ee9 100644 --- a/tests/unit/model_parallelism/test_configurable_parallel_mp.py +++ b/tests/unit/model_parallelism/test_configurable_parallel_mp.py @@ -170,7 +170,7 @@ def test(self, baseline_mp2, inputs, class_tmpdir): test = model(inputs[0].to(device_name), inputs[1].to(device_name), inputs[2].to(device_name)) if dist.get_rank() == 0: load_path = os.path.join(class_tmpdir, "output.pt") - baseline = torch.load(load_path, weights_only=True) + baseline = torch.load(load_path, weights_only=False) test = test.cpu() assert torch.allclose( baseline, test, diff --git a/tests/unit/model_parallelism/test_configurable_parallel_pp.py b/tests/unit/model_parallelism/test_configurable_parallel_pp.py index 7a62de77fc05..df469044e186 100644 --- a/tests/unit/model_parallelism/test_configurable_parallel_pp.py +++ b/tests/unit/model_parallelism/test_configurable_parallel_pp.py @@ -225,7 +225,7 @@ def _test(self, inputs, class_tmpdir, checkpoint_tag, mp_size, pp_size, mp_resiz assert torch.is_tensor(test[0][0]) test = test[0][0].cpu() load_path = os.path.join(class_tmpdir, f"output-{checkpoint_tag}.pt") - baseline = torch.load(load_path, weights_only=True) + baseline = torch.load(load_path, weights_only=False) assert torch.allclose( baseline, test, atol=1e-03), f"Baseline output {baseline} is not equal to save-then-load output {test}"