From 8ea4da45797caf309314feb7c40543cfa1653c58 Mon Sep 17 00:00:00 2001 From: panshaowu Date: Wed, 8 Nov 2023 14:25:09 +0800 Subject: [PATCH] fix issue I8CJMD (#606) Co-authored-by: panshaowu --- docs/cn/tutorials/yaml_configuration.md | 2 +- docs/en/tutorials/yaml_configuration.md | 2 +- tools/benchmarking/multi_dataset_eval.py | 6 ++++++ tools/eval.py | 6 ++++++ tools/infer/text/predict_det.py | 13 +++++++++---- tools/infer/text/predict_rec.py | 8 ++++++-- tools/train.py | 6 ++++++ 7 files changed, 35 insertions(+), 8 deletions(-) diff --git a/docs/cn/tutorials/yaml_configuration.md b/docs/cn/tutorials/yaml_configuration.md index 9b214fdef..192e1e791 100644 --- a/docs/cn/tutorials/yaml_configuration.md +++ b/docs/cn/tutorials/yaml_configuration.md @@ -24,7 +24,7 @@ | mode | MindSpore运行模式(静态图/动态图) | 0 | 0 / 1 | 0: 表示在GRAPH_MODE模式中运行; 1: PYNATIVE_MODE模式 | | distribute | 是否开启并行训练 | True | True / False | \ | | device_id | 指定单卡训练时的卡id | 7 | 机器可用的卡的id | 该参数仅在distribute=False(单卡训练)和环境变量DEVICE_ID未设置时生效。单卡训练时,如该参数和环境变量DEVICE_ID均未设置,则默认使用0卡。 | -| amp_level | 混合精度模式 | O0 | O0/O1/O2/O3 | 'O0' - 不变化。
'O1' - 将白名单内的Cell和运算转为float16精度,其余部分保持float32精度。
'O2' - 将黑名单内的Cell和运算保持float32精度,其余部分转为float16精度。
'O3' - 将网络全部转为float16精度。| +| amp_level | 混合精度模式 | O0 | O0/O1/O2/O3 | 'O0' - 不变化。
'O1' - 将白名单内的Cell和运算转为float16精度,其余部分保持float32精度。
'O2' - 将黑名单内的Cell和运算保持float32精度,其余部分转为float16精度。
'O3' - 将网络全部转为float16精度。
注意:GPU平台上的模型推理或评估暂不支持'O3'模式,如设置为'O3'模式,程序会自动将其转为'O2'模式。| | seed | 随机种子 | 42 | Integer | \ | | ckpt_save_policy | 模型权重保存策略 | top_k | "top_k" 或 "latest_k" | "top_k"表示保存前k个评估指标分数最高的checkpoint;"latest_k"表示保存最新的k个checkpoint。 `k`的数值通过`ckpt_max_keep`参数定义 | | ckpt_max_keep | 最多保存的checkpoint数量 | 5 | Integer | \ | diff --git a/docs/en/tutorials/yaml_configuration.md b/docs/en/tutorials/yaml_configuration.md index 954e1fddc..fa1d5e192 100644 --- a/docs/en/tutorials/yaml_configuration.md +++ b/docs/en/tutorials/yaml_configuration.md @@ -23,7 +23,7 @@ This document takes `configs/rec/crnn/crnn_icdar15.yaml` as an example to descri | mode | Mindspore running mode (static graph/dynamic graph) | 0 | 0 / 1 | 0: means running in GRAPH_MODE mode; 1: PYNATIVE_MODE mode | | distribute | Whether to enable parallel training | True | True / False | \ | | device_id | Specify the device id while standalone training | 7 | The ids of all devices in the server | Only valid when distribute=False (standalone training) and environment variable 'DEVICE_ID' is NOT set. While standalone training, if both this arg and environment variable 'DEVICE_ID' are NOT set, use device 0 by default. | -| amp_level | Mixed precision mode | O0 | O0/O1/O2/O3 | 'O0' - no change.
'O1' - convert the cells and operations in the whitelist to float16 precision, and keep the rest in float32 precision.
'O2' - Keep the cells and operations in the blacklist with float32 precision, and convert the rest to float16 precision.
'O3' - Convert all networks to float16 precision. | +| amp_level | Mixed precision mode | O0 | O0/O1/O2/O3 | 'O0' - no change.
'O1' - convert the cells and operations in the whitelist to float16 precision, and keep the rest in float32 precision.
'O2' - Keep the cells and operations in the blacklist with float32 precision, and convert the rest to float16 precision.
'O3' - Convert all networks to float16 precision.
Notice: Model prediction or evaluation does not support 'O3' on GPU platform. If amp_level is set to 'O3' for model prediction and evaluation on GPU platform, the program will switch it to 'O2' automatically.| | seed | Random seed | 42 | Integer | \ | | ckpt_save_policy | The policy for saving model weights | top_k | "top_k" or "latest_k" | "top_k" means to keep the top k checkpoints according to the metric score; "latest_k" means to keep the last k checkpoints. The value of `k` is set via `ckpt_max_keep` | | ckpt_max_keep | The maximum number of checkpoints to keep during training | 5 | Integer | \ | diff --git a/tools/benchmarking/multi_dataset_eval.py b/tools/benchmarking/multi_dataset_eval.py index af95d7730..9c791433a 100644 --- a/tools/benchmarking/multi_dataset_eval.py +++ b/tools/benchmarking/multi_dataset_eval.py @@ -102,6 +102,12 @@ def main(cfg): # model cfg.model.backbone.pretrained = False amp_level = cfg.system.get("amp_level_infer", "O0") + if ms.get_context("device_target") == "GPU" and amp_level == "O3": + logger.warning( + "Model inference does not support amp_level O3 on GPU currently. " + "The program has switched to amp_level O2 automatically." + ) + amp_level = "O2" network = build_model(cfg.model, ckpt_load_path=cfg.eval.ckpt_load_path, amp_level=amp_level) num_params = sum([param.size for param in network.get_parameters()]) num_trainable_params = sum([param.size for param in network.trainable_params()]) diff --git a/tools/eval.py b/tools/eval.py index 96551792a..aaa91f00c 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -70,6 +70,12 @@ def main(cfg): # model cfg.model.backbone.pretrained = False amp_level = cfg.system.get("amp_level_infer", "O0") + if ms.get_context("device_target") == "GPU" and amp_level == "O3": + logger.warning( + "Model evaluation does not support amp_level O3 on GPU currently. " + "The program has switched to amp_level O2 automatically." + ) + amp_level = "O2" network = build_model(cfg.model, ckpt_load_path=cfg.eval.ckpt_load_path, amp_level=amp_level) num_params = sum([param.size for param in network.get_parameters()]) num_trainable_params = sum([param.size for param in network.trainable_params()]) diff --git a/tools/infer/text/predict_det.py b/tools/infer/text/predict_det.py index e04ccfdfc..ea4a5d9ad 100644 --- a/tools/infer/text/predict_det.py +++ b/tools/infer/text/predict_det.py @@ -2,7 +2,7 @@ Text detection inference Example: - $ python tools/infer/text/predict_det.py --image_dir {path_to_img} --rec_algorithm DB++ + $ python tools/infer/text/predict_det.py --image_dir {path_to_img} --det_algorithm DB++ """ import json @@ -54,9 +54,14 @@ def __init__(self, args): f"Supported detection algorithms are {list(algo_to_model_name.keys())}" ) model_name = algo_to_model_name[args.det_algorithm] - self.model = build_model( - model_name, pretrained=pretrained, ckpt_load_path=ckpt_load_path, amp_level=args.det_amp_level - ) + amp_level = args.det_amp_level + if ms.get_context("device_target") == "GPU" and amp_level == "O3": + logger.warning( + "Detection model prediction does not support amp_level O3 on GPU currently. " + "The program has switched to amp_level O2 automatically." + ) + amp_level = "O2" + self.model = build_model(model_name, pretrained=pretrained, ckpt_load_path=ckpt_load_path, amp_level=amp_level) self.model.set_train(False) logger.info( "Init detection model: {} --> {}. Model weights loaded from {}".format( diff --git a/tools/infer/text/predict_rec.py b/tools/infer/text/predict_rec.py index b79554c3b..44dee3ace 100644 --- a/tools/infer/text/predict_rec.py +++ b/tools/infer/text/predict_rec.py @@ -65,14 +65,18 @@ def __init__(self, args): ) model_name = algo_to_model_name[args.rec_algorithm] - # amp_level = 'O2' if args.rec_algorithm.startswith('SVTR') else args.rec_amp_level amp_level = args.rec_amp_level if args.rec_algorithm.startswith("SVTR") and amp_level != "O2": logger.warning( "SVTR recognition model is optimized for amp_level O2. ampl_level for rec model is changed to O2" ) amp_level = "O2" - + if ms.get_context("device_target") == "GPU" and amp_level == "O3": + logger.warning( + "Recognition model prediction does not support amp_level O3 on GPU currently. " + "The program has switched to amp_level O2 automatically." + ) + amp_level = "O2" self.model = build_model(model_name, pretrained=pretrained, ckpt_load_path=ckpt_load_path, amp_level=amp_level) self.model.set_train(False) diff --git a/tools/train.py b/tools/train.py index 5cae66516..74d4c642c 100644 --- a/tools/train.py +++ b/tools/train.py @@ -107,6 +107,12 @@ def main(cfg): # create model amp_level = cfg.system.get("amp_level", "O0") + if ms.get_context("device_target") == "GPU" and cfg.system.val_while_train and amp_level == "O3": + logger.warning( + "Model evaluation does not support amp_level O3 on GPU currently. " + "The program has switched to amp_level O2 automatically." + ) + amp_level = "O2" network = build_model(cfg.model, ckpt_load_path=cfg.model.pop("pretrained", None), amp_level=amp_level) num_params = sum([param.size for param in network.get_parameters()]) num_trainable_params = sum([param.size for param in network.trainable_params()])