Merge branch 'PaddlePaddle:main' into main

EchoEcho618 · Jul 29, 2024 · 838f273 · 838f273
2 parents 8918b6e + d1583d4
commit 838f273
Show file tree

Hide file tree

Showing 677 changed files with 45,913 additions and 98 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -0,0 +1,81 @@
+# PaddleOCR
+
+name: 🐛 Bug Report
+description: Problems with PaddleOCR
+labels: [bug]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thank you for submitting a PaddleOCR 🐛 Bug Report!
+
+  - type: checkboxes
+    attributes:
+      label: Search before asking
+      description: >
+        Please search the PaddleOCR [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Issues](https://github.com/PaddlePaddle/PaddleOCR/issues) and [Discussions](https://github.com/PaddlePaddle/PaddleOCR/discussions) to see if a similar bug report already exists.
+      options:
+        - label: >
+            I have searched the PaddleOCR [Docs](https://paddlepaddle.github.io/PaddleOCR/) and found no similar bug report.
+          required: true
+        - label: >
+            I have searched the PaddleOCR [Issues](https://github.com/PaddlePaddle/PaddleOCR/issues) and found no similar bug report.
+          required: true
+        - label: >
+            I have searched the PaddleOCR [Discussions](https://github.com/PaddlePaddle/PaddleOCR/discussions) and found no similar bug report.
+          required: true
+
+  - type: textarea
+    attributes:
+      label: Bug
+      description: Provide console output with error messages and/or screenshots of the bug.
+      placeholder: |
+        💡 ProTip! Include as much information as possible (screenshots, logs, tracebacks etc.) to receive the most helpful response.
+    validations:
+      required: true
+
+  - type: textarea
+    attributes:
+      label: Environment
+      description: Please specify the software and hardware you used to produce the bug.
+      placeholder: |
+
+        ```
+        OS                  macOS-13.5.2
+        Environment         Jupyter
+        Python              3.11.2
+        PaddleOCR           2.8.1
+        Install             git
+        RAM                 16.00 GB
+        CPU                 Apple M2
+        CUDA                None
+        ```
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Minimal Reproducible Example
+      description: >
+        When asking a question, people will be better able to provide help if you provide code that they can easily understand and use to **reproduce** the problem.
+        This is referred to by community members as creating a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example).
+      placeholder: |
+        ```
+        # Code to reproduce your issue here
+        ```
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Additional
+      description: Anything else you would like to share?
+
+  - type: checkboxes
+    attributes:
+      label: Are you willing to submit a PR?
+      description: >
+        (Optional) We encourage you to submit a [Pull Request](https://github.com/PaddlePaddle/PaddleOCR/pulls) (PR) to help improve PaddleOCR for everyone, especially if you have a good understanding of how to implement a fix or feature.
+        See the PaddleOCR [community_contribution](https://paddlepaddle.github.io/PaddleOCR/community/community_contribution.html#2) to get started.
+      options:
+        - label: Yes I'd like to help by submitting a PR!
diff --git a/.github/ISSUE_TEMPLATE/bug.md b/.github/ISSUE_TEMPLATE/bug.md
diff --git a/.github/workflows/documents.yml b/.github/workflows/documents.yml
@@ -0,0 +1,29 @@
+name: build_document_site
+on:
+  push:
+    branches:
+      - master
+      - main
+permissions:
+  contents: write
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Configure Git Credentials
+        run: |
+          git config user.name github-actions[bot]
+          git config user.email 41898282+github-actions[bot]@users.noreply.github.com
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+      - uses: actions/cache@v4
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: pip install mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2 mkdocs-static-i18n
+      - run: mkdocs gh-deploy --force
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,13 +10,13 @@ repos:
     -   id: detect-private-key
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
-        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py|md)$
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
 -   repo: https://github.com/Lucas-C/pre-commit-hooks
     rev: v1.5.1
     hooks:
     -   id: remove-crlf
     -   id: remove-tabs
-        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py|md)$
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
 -   repo: local
     hooks:
     -   id: clang-format

diff --git a/README.md b/README.md
@@ -30,7 +30,17 @@ PaddleOCR 由 [PMC](https://github.com/PaddlePaddle/PaddleOCR/issues/12122) 监
 ⚠️注意：[Issues](https://github.com/PaddlePaddle/PaddleOCR/issues)模块仅用来报告程序🐞Bug，其余提问请移步[Discussions](https://github.com/PaddlePaddle/PaddleOCR/discussions)模块提问。如所提Issue不是Bug，会被移到Discussions模块，敬请谅解。
 
 ## 📣 近期更新
+
+- **🔥2024.7 添加 PaddleOCR 算法模型挑战赛冠军方案**：
+    - 赛题一：OCR 端到端识别任务冠军方案——[场景文本识别算法-SVTRv2](doc/doc_ch/algorithm_rec_svtrv2.md)；
+    - 赛题二：通用表格识别任务冠军方案——[表格识别算法-SLANet-LCNetV2](doc/doc_ch/algorithm_table_slanet.md)。
+
+- **💥2024.6.27 飞桨低代码开发工具 [PaddleX 3.0](https://github.com/paddlepaddle/paddlex) 重磅更新！**
+  - 低代码开发范式：支持 OCR 模型全流程低代码开发，提供 Python API，支持用户自定义串联模型；
+  - 多硬件训推支持：支持英伟达 GPU、昆仑芯、昇腾和寒武纪等多种硬件进行模型训练与推理。PaddleOCR支持的模型见 [模型列表](doc/doc_ch/hardware/supported_models.md)
+
 - **📚直播和OCR实战打卡营预告**：《PP-ChatOCRv2赋能金融报告信息智能化抽取，新金融效率再升级》课程上线，破解复杂版面、表格识别、信息抽取OCR解析难题，直播时间：6月6日（周四）19：00。并于6月11日启动【政务采购合同信息抽取】实战打卡营。报名链接：https://www.wjx.top/vm/eBcYmqO.aspx?udsid=197406
+
 - **🔥2024.5.10 上线星河零代码产线(OCR 相关)**：全面覆盖了以下四大 OCR 核心任务，提供极便捷的 Badcase 分析和实用的在线体验：
   - [通用 OCR](https://aistudio.baidu.com/community/app/91660) (PP-OCRv4)。
   - [通用表格识别](https://aistudio.baidu.com/community/app/91661) (SLANet)。

diff --git a/README_en.md b/README_en.md
@@ -31,6 +31,11 @@ PaddleOCR is being oversight by a [PMC](https://github.com/PaddlePaddle/PaddleOC
 ⚠️ Note: The [Issues](https://github.com/PaddlePaddle/PaddleOCR/issues) module is only for reporting program 🐞 bugs, for the rest of the questions, please move to the [Discussions](https://github.com/PaddlePaddle/PaddleOCR/discussions). Please note that if the Issue mentioned is not a bug, it will be moved to the Discussions module.
 
 ## 📣 Recent updates
+
+- **🔥2024.7 Added PaddleOCR Algorithm Model Challenge Champion Solutions**:
+    - Challenge One, OCR End-to-End Recognition Task Champion Solution: [Scene Text Recognition Algorithm-SVTRv2](doc/doc_ch/algorithm_rec_svtrv2.md);
+    - Challenge Two, General Table Recognition Task Champion Solution: [Table Recognition Algorithm-SLANet-LCNetV2](doc/doc_ch/algorithm_table_slanet.md).
+
 - **🔥2023.8.7 Release PaddleOCR[release/2.7](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.7)**
   - Release [PP-OCRv4](./doc/doc_ch/PP-OCRv4_introduction.md), support mobile version and server version
     - PP-OCRv4-mobile：When the speed is comparable, the effect of the Chinese scene is improved by 4.5% compared with PP-OCRv3, the English scene is improved by 10%, and the average recognition accuracy of the 80-language multilingual model is increased by more than 8%.

diff --git a/VERSION_NUMBER b/VERSION_NUMBER
@@ -1 +1 @@
-2.8.0
+2.8.1
diff --git a/__init__.py b/__init__.py
@@ -11,13 +11,24 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .paddleocr import *
+from .paddleocr import (
+    PaddleOCR,
+    PPStructure,
+    draw_ocr,
+    draw_structure_result,
+    save_structure_res,
+    download_with_progressbar,
+    sorted_layout_boxes,
+    convert_info_docx,
+    to_excel,
+)
 import importlib.metadata as importlib_metadata
 
 try:
     __version__ = importlib_metadata.version(__package__ or __name__)
 except importlib_metadata.PackageNotFoundError:
     __version__ = "0.0.0"
+
 __all__ = [
     "PaddleOCR",
     "PPStructure",

diff --git a/configs/rec/SVTRv2/rec_repsvtr_ch.yml b/configs/rec/SVTRv2/rec_repsvtr_ch.yml
@@ -0,0 +1,134 @@
+Global:
+  debug: false
+  use_gpu: true
+  epoch_num: 200
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_repsvtr_ch
+  save_epoch_step: 10
+  eval_batch_step: [0, 1000]
+  cal_metric_during_train: False
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: false
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+  max_text_length: &max_text_length 25
+  infer_mode: false
+  use_space_char: true
+  distributed: true
+  save_res_path: ./output/rec/predicts_repsvtr.txt
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1.e-8
+  weight_decay: 0.025
+  no_weight_decay_name: norm
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 0.001 # 8gpus 192bs
+    warmup_epoch: 5
+
+
+Architecture:
+  model_type: rec
+  algorithm: SVTR_HGNet
+  Transform:
+  Backbone:
+    name: RepSVTR
+  Head:
+    name: MultiHead
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 256
+            depth: 2
+            hidden_dims: 256
+            kernel_size: [1, 3]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: *max_text_length
+          num_decoder_layers: 2
+
+Loss:
+  name: MultiLoss
+  loss_config_list:
+    - CTCLoss:
+    - NRTRLoss:
+
+PostProcess:  
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+
+Train:
+  dataset:
+    name: MultiScaleDataSet
+    ds_width: false
+    data_dir: ./train_data/
+    ext_op_transform_idx: 1
+    label_file_list:
+    - ./train_data/train_list.txt
+    transforms:
+    - DecodeImage:
+        img_mode: BGR
+        channel_first: false
+    - RecAug:
+    - MultiLabelEncode:
+        gtc_encode: NRTRLabelEncode
+    - KeepKeys:
+        keep_keys:
+        - image
+        - label_ctc
+        - label_gtc
+        - length
+        - valid_ratio
+  sampler:
+    name: MultiScaleSampler
+    scales: [[320, 32], [320, 48], [320, 64]]
+    first_bs: &bs 192
+    fix_bs: false
+    divided_factor: [8, 16] # w, h
+    is_training: True
+  loader:
+    shuffle: true
+    batch_size_per_card: *bs
+    drop_last: true
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data
+    label_file_list:
+    - ./train_data/val_list.txt
+    transforms:
+    - DecodeImage:
+        img_mode: BGR
+        channel_first: false
+    - MultiLabelEncode:
+        gtc_encode: NRTRLabelEncode
+    - RecResizeImg:
+        image_shape: [3, 48, 320]
+    - KeepKeys:
+        keep_keys:
+        - image
+        - label_ctc
+        - label_gtc
+        - length
+        - valid_ratio
+  loader:
+    shuffle: false
+    drop_last: false
+    batch_size_per_card: 128
+    num_workers: 4