-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
66 lines (50 loc) · 1.96 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
# ------------------------------------------------------------------------------------
# Copyright (c) 2023 KakaoBrain. All Rights Reserved.
# Licensed under the MIT License [see LICENSE for details]
# ------------------------------------------------------------------------------------
import logging
import pprint
import pytorch_lightning as pl
from omegaconf import DictConfig
from noc.utils.callbacks import CallbackTrainer
from noc.utils.main_utils import init_data_loader, init_hydra_config, init_model, init_trainer
logging.info("Training with config:")
logging.getLogger().setLevel(logging.DEBUG)
def main():
# init cfg
cfg = init_hydra_config(mode="train")
# set random seed
if "seed" in cfg.experiment and cfg.experiment.seed >= 0:
pl.seed_everything(cfg.experiment.seed, workers=True)
# init dataloader
loaders = []
custom_callbacks = []
cfg, train_loader = init_data_loader(cfg, split="train")
loaders.append(train_loader)
if cfg.dataset.ds_type == "webdataset":
custom_callbacks.append(CallbackTrainer(train_loader)) # set epoch for shuffling webdataset
else:
custom_callbacks = None
# validation loader
_, val_loader = init_data_loader(cfg, split="val")
loaders.append(val_loader)
# init model
cfg, model = init_model(cfg)
# init trainer
cfg, trainer = init_trainer(cfg, custom_callbacks=custom_callbacks)
if trainer.global_rank == 0:
# avoiding repeated print when using multiple gpus
logging.info(f"MODEL: {model}")
logging.info(cfg.pretty() if isinstance(cfg, DictConfig) else pprint.pformat(cfg))
# train
logging.info(
f"Start Training: Total Epoch - {cfg.trainer.max_epochs}, Precision: {cfg.trainer.precision}"
)
trainer.fit(
model,
*loaders,
ckpt_path=cfg.experiment.resume_from if cfg.experiment.resume else None,
)
if __name__ == "__main__":
main()