-
Notifications
You must be signed in to change notification settings - Fork 57
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add DBNet MobileNetv3 PP-OCRv3 model (#612)
- Loading branch information
1 parent
be11091
commit 86522af
Showing
14 changed files
with
1,371 additions
and
18 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
system: | ||
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore | ||
distribute: False | ||
amp_level: 'O0' | ||
seed: 42 | ||
log_interval: 10 | ||
val_while_train: True | ||
# val_start_epoch: 10 | ||
drop_overflow_update: False | ||
|
||
model: | ||
type: det | ||
transform: null | ||
backbone: | ||
name: det_mobilenet_v3_enhance | ||
architecture: large | ||
alpha: 0.5 | ||
disable_se: True | ||
pretrained: False | ||
neck: | ||
name: RSEFPN | ||
out_channels: 96 | ||
shortcut: True | ||
head: | ||
name: DBHeadEnhance | ||
k: 50 | ||
bias: False | ||
adaptive: True | ||
|
||
postprocess: | ||
name: DBPostprocess | ||
box_type: quad # whether to output a polygon or a box | ||
binary_thresh: 0.3 # binarization threshold | ||
box_thresh: 0.9 # box score threshold 0.9 | ||
max_candidates: 1000 | ||
expand_ratio: 1.5 # coefficient for expanding predictions | ||
|
||
metric: | ||
name: DetMetric | ||
main_indicator: f-score | ||
|
||
loss: | ||
name: DBLoss | ||
eps: 1.0e-6 | ||
l1_scale: 10 | ||
bce_scale: 5 | ||
bce_replace: diceloss | ||
|
||
scheduler: | ||
scheduler: warmup_cosine_decay | ||
lr: 0.001 | ||
min_lr: 0.0 | ||
num_epochs: 500 | ||
warmup_epochs: 2 | ||
decay_epochs: 498 | ||
|
||
optimizer: | ||
opt: Adam | ||
beta1: 0.9 | ||
beta2: 0.999 | ||
weight_decay: 5.0e-05 | ||
|
||
# only used for mixed precision training | ||
loss_scaler: | ||
type: dynamic | ||
loss_scale: 512 | ||
scale_factor: 2 | ||
scale_window: 1000 | ||
|
||
train: | ||
ckpt_save_dir: ./tmp_det | ||
dataset_sink_mode: False | ||
dataset: | ||
type: DetDataset | ||
dataset_root: dir/to/data/ | ||
data_dir: training/ | ||
label_file: train_det.txt | ||
sample_ratio: 1.0 | ||
transform_pipeline: | ||
- DecodeImage: | ||
img_mode: RGB | ||
to_float32: False | ||
- DetLabelEncode: | ||
- RandomColorAdjust: | ||
brightness: 0.1255 # 32.0 / 255 | ||
saturation: 0.5 | ||
- RandomHorizontalFlip: | ||
p: 0.5 | ||
- RandomRotate: | ||
degrees: [ -10, 10 ] | ||
expand_canvas: False | ||
p: 1.0 | ||
- RandomScale: | ||
scale_range: [ 0.5, 3.0 ] | ||
p: 1.0 | ||
- RandomCropWithBBox: | ||
max_tries: 10 | ||
min_crop_ratio: 0.1 | ||
crop_size: [ 960, 960 ] | ||
p: 1.0 | ||
- ValidatePolygons: | ||
- ShrinkBinaryMap: | ||
min_text_size: 8 | ||
shrink_ratio: 0.4 | ||
- BorderMap: | ||
shrink_ratio: 0.4 | ||
thresh_min: 0.3 | ||
thresh_max: 0.7 | ||
- NormalizeImage: | ||
bgr_to_rgb: False | ||
is_hwc: True | ||
mean: imagenet | ||
std: imagenet | ||
- ToCHWImage: | ||
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize | ||
output_columns: [ 'image', 'binary_map', 'mask', 'thresh_map', 'thresh_mask'] | ||
net_input_column_index: [0] # input indices for network forward func in output_columns | ||
label_column_index: [1, 2, 3, 4] # input indices marked as label | ||
|
||
loader: | ||
shuffle: True | ||
batch_size: 10 | ||
drop_remainder: True | ||
num_workers: 10 | ||
|
||
eval: | ||
ckpt_load_path: tmp_det/best.ckpt | ||
dataset_sink_mode: False | ||
dataset: | ||
type: DetDataset | ||
dataset_root: dir/to/data/ | ||
data_dir: validation/ | ||
label_file: val_det.txt | ||
sample_ratio: 1.0 | ||
transform_pipeline: | ||
- DecodeImage: | ||
img_mode: RGB | ||
to_float32: False | ||
- DetLabelEncode: | ||
- DetResize: # GridResize 32 | ||
limit_type: 'min' | ||
limit_side_len: 736 | ||
- NormalizeImage: | ||
bgr_to_rgb: True | ||
is_hwc: True | ||
mean: imagenet | ||
std: imagenet | ||
- ToCHWImage: | ||
# the order of the dataloader list, matching the network input and the labels for evaluation | ||
output_columns: [ 'image', 'polys', 'ignore_tags', 'shape_list' ] | ||
net_input_column_index: [0] # input indices for network forward func in output_columns | ||
label_column_index: [1, 2] # input indices marked as label | ||
|
||
loader: | ||
shuffle: False | ||
batch_size: 1 # TODO: due to dynamic shape of polygons (num of boxes varies), BS has to be 1 | ||
drop_remainder: False | ||
num_workers: 3 |
303 changes: 303 additions & 0 deletions
303
configs/det/dbnet/db_mobilenetv3_ppocrv3_param_map.json
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.