Skip to content

Commit

Permalink
add DBNet MobileNetv3 PP-OCRv3 model (#612)
Browse files Browse the repository at this point in the history
  • Loading branch information
tonytonglt authored Nov 17, 2023
1 parent be11091 commit 86522af
Show file tree
Hide file tree
Showing 14 changed files with 1,371 additions and 18 deletions.
411 changes: 411 additions & 0 deletions configs/det/dbnet/README_CN_PP-OCRv3.md

Large diffs are not rendered by default.

158 changes: 158 additions & 0 deletions configs/det/dbnet/db_mobilenetv3_ppocrv3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
system:
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: False
amp_level: 'O0'
seed: 42
log_interval: 10
val_while_train: True
# val_start_epoch: 10
drop_overflow_update: False

model:
type: det
transform: null
backbone:
name: det_mobilenet_v3_enhance
architecture: large
alpha: 0.5
disable_se: True
pretrained: False
neck:
name: RSEFPN
out_channels: 96
shortcut: True
head:
name: DBHeadEnhance
k: 50
bias: False
adaptive: True

postprocess:
name: DBPostprocess
box_type: quad # whether to output a polygon or a box
binary_thresh: 0.3 # binarization threshold
box_thresh: 0.9 # box score threshold 0.9
max_candidates: 1000
expand_ratio: 1.5 # coefficient for expanding predictions

metric:
name: DetMetric
main_indicator: f-score

loss:
name: DBLoss
eps: 1.0e-6
l1_scale: 10
bce_scale: 5
bce_replace: diceloss

scheduler:
scheduler: warmup_cosine_decay
lr: 0.001
min_lr: 0.0
num_epochs: 500
warmup_epochs: 2
decay_epochs: 498

optimizer:
opt: Adam
beta1: 0.9
beta2: 0.999
weight_decay: 5.0e-05

# only used for mixed precision training
loss_scaler:
type: dynamic
loss_scale: 512
scale_factor: 2
scale_window: 1000

train:
ckpt_save_dir: ./tmp_det
dataset_sink_mode: False
dataset:
type: DetDataset
dataset_root: dir/to/data/
data_dir: training/
label_file: train_det.txt
sample_ratio: 1.0
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- DetLabelEncode:
- RandomColorAdjust:
brightness: 0.1255 # 32.0 / 255
saturation: 0.5
- RandomHorizontalFlip:
p: 0.5
- RandomRotate:
degrees: [ -10, 10 ]
expand_canvas: False
p: 1.0
- RandomScale:
scale_range: [ 0.5, 3.0 ]
p: 1.0
- RandomCropWithBBox:
max_tries: 10
min_crop_ratio: 0.1
crop_size: [ 960, 960 ]
p: 1.0
- ValidatePolygons:
- ShrinkBinaryMap:
min_text_size: 8
shrink_ratio: 0.4
- BorderMap:
shrink_ratio: 0.4
thresh_min: 0.3
thresh_max: 0.7
- NormalizeImage:
bgr_to_rgb: False
is_hwc: True
mean: imagenet
std: imagenet
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize
output_columns: [ 'image', 'binary_map', 'mask', 'thresh_map', 'thresh_mask']
net_input_column_index: [0] # input indices for network forward func in output_columns
label_column_index: [1, 2, 3, 4] # input indices marked as label

loader:
shuffle: True
batch_size: 10
drop_remainder: True
num_workers: 10

eval:
ckpt_load_path: tmp_det/best.ckpt
dataset_sink_mode: False
dataset:
type: DetDataset
dataset_root: dir/to/data/
data_dir: validation/
label_file: val_det.txt
sample_ratio: 1.0
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- DetLabelEncode:
- DetResize: # GridResize 32
limit_type: 'min'
limit_side_len: 736
- NormalizeImage:
bgr_to_rgb: True
is_hwc: True
mean: imagenet
std: imagenet
- ToCHWImage:
# the order of the dataloader list, matching the network input and the labels for evaluation
output_columns: [ 'image', 'polys', 'ignore_tags', 'shape_list' ]
net_input_column_index: [0] # input indices for network forward func in output_columns
label_column_index: [1, 2] # input indices marked as label

loader:
shuffle: False
batch_size: 1 # TODO: due to dynamic shape of polygons (num of boxes varies), BS has to be 1
drop_remainder: False
num_workers: 3
303 changes: 303 additions & 0 deletions configs/det/dbnet/db_mobilenetv3_ppocrv3_param_map.json

Large diffs are not rendered by default.

Loading

0 comments on commit 86522af

Please sign in to comment.