-
Notifications
You must be signed in to change notification settings - Fork 0
/
ResumeAll.py
40 lines (36 loc) · 1.54 KB
/
ResumeAll.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import torch
def restart_from_checkpoint(ckp_path, run_variables=None, **kwargs):
"""
Re-start from checkpoint
usage
run_variables a dict that contains keys like 'args' 'epoch'
**kwargs for named variables pairs having
load_state_dict() method like optimizer and model/ddp
"""
if not os.path.isfile(ckp_path):
return
print("Found checkpoint at {}".format(ckp_path))
# open checkpoint file
checkpoint = torch.load(ckp_path, map_location="cpu")
# key is what to look for in the checkpoint file
# value is the object to load
# example: {'state_dict': model}
for key, value in kwargs.items():
if key in checkpoint and value is not None:
try:
msg = value.load_state_dict(checkpoint[key], strict=False)
print("=> loaded '{}' from checkpoint '{}' with msg {}".format(key, ckp_path, msg))
except TypeError:
try:
msg = value.load_state_dict(checkpoint[key])
print("=> loaded '{}' from checkpoint: '{}'".format(key, ckp_path))
except ValueError:
print("=> failed to load '{}' from checkpoint: '{}'".format(key, ckp_path))
else:
print("=> key '{}' not found in checkpoint: '{}'".format(key, ckp_path))
# re load variable important for the run
if run_variables is not None:
for var_name in run_variables:
if var_name in checkpoint:
run_variables[var_name] = checkpoint[var_name]