Skip to content

Commit

Permalink
feat: add direction classifier
Browse files Browse the repository at this point in the history
  • Loading branch information
gutenye committed May 20, 2024
1 parent f6a8d2b commit 386cc70
Show file tree
Hide file tree
Showing 19 changed files with 206 additions and 143 deletions.
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@
"*.ipp": "cpp",
"format": "cpp",
"type_traits": "cpp",
"__memory": "cpp"
"__memory": "cpp",
"memory": "cpp"
},
"[jsonc]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ Ocr.create({
detectionUnclipRatiop?: number // RN only
detectionUseDilate?: boolean // RN only
detectionUsePolygonScore?: boolean // RN only
detectionUseDirectionClassify?: boolean // RN only
useDirectionClassify?: boolean // RN only
onnxOptions?: {} // Node only. Pass to ONNX Runtime
}): Promise<Ocr>
Expand Down
8 changes: 6 additions & 2 deletions ake
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@ def 'main react-native example' [] {
}

# Runc cpp example
def 'main cpp example' [] {
def 'main cpp example' [path?: string] {
cd packages/react-native/cpp/example
./ake start
if $path == null {
./ake start
} else {
./ake start $path
}
}

def publish [package: string, version: string] {
Expand Down
1 change: 1 addition & 0 deletions assets/ch_ppocr_mobile_v2.0_cls_infer.onnx
Binary file added assets/direction.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@gutenye/ocr",
"version": "1.4.1",
"version": "1.4.2",
"workspaces": [
"packages/browser",
"packages/browser/example",
Expand Down
3 changes: 3 additions & 0 deletions packages/models/assets/ch_ppocr_mobile_v2.0_cls_infer.onnx
Git LFS file not shown
2 changes: 1 addition & 1 deletion packages/models/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@gutenye/ocr-models",
"description": "Guten OCR is a high accurate text detection (OCR) Javascript/Typescript library that runs on Node.js, Browser, React Native and C++. Based on PaddleOCR and ONNX runtime",
"version": "1.4.0",
"version": "1.4.2",
"type": "module",
"license": "MIT",
"repository": {
Expand Down
161 changes: 83 additions & 78 deletions packages/react-native/cpp/classifier_process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,95 +13,100 @@
// limitations under the License.

#include "classifier_process.h"
#include <iostream>
#include "timer.h"

const std::vector<int> cls_image_shape {3, 48, 192};
cv::Mat ClsResizeImg(cv::Mat img) {
int imgC, imgH, imgW;
imgC = cls_image_shape[0];
imgH = cls_image_shape[1];
imgW = cls_image_shape[2];

float ratio = static_cast<float>(img.cols) / static_cast<float>(img.rows);

int resize_w, resize_h;
if (ceilf(imgH * ratio) > imgW)
resize_w = imgW;
else
resize_w = int(ceilf(imgH * ratio));
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, cv::INTER_LINEAR);
if (resize_w < imgW) {
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
}
return resize_img;
}
const std::vector<int> classifier_image_shape {3, 48, 192};

cv::Mat classifier_resize_image(cv::Mat source_image, int resize_height, int max_resize_width, Options &options);

ClassifierPredictor::ClassifierPredictor(Options &options)
: m_options {options}, m_onnx {Onnx(options.models.classifier_model_path)} {}

ClassifierResult ClassifierPredictor::predict(const cv::Mat &source_image, const float thresh) {
ModelPerformance performance {};

Timer timer;
timer.start();
auto image = preprocess(source_image);
timer.end();
performance.preprocess_time = timer.get_average_ms();

// Run predictor
std::vector<int64_t> input_shape {1, image.channels, image.height, image.width};
timer.start();
// TODO: hangs on run
auto model_output = m_onnx.run(image.data, input_shape);
timer.end();
performance.predict_time = timer.get_average_ms();

// Process Output
timer.start();
std::cout << "[DEBUG] 0" << std::endl;
auto result_image = postprocess(model_output, source_image, thresh);
timer.end();
performance.postprocess_time = timer.get_average_ms();

ClassifierPredictor::ClassifierPredictor(const std::string &modelDir, const int cpu_thread_num,
const std::string &cpu_power_mode) {}
performance.total_time = performance.preprocess_time + performance.predict_time + performance.postprocess_time;

void ClassifierPredictor::preprocess(const cv::Mat &img) {
return ClassifierResult {.data = result_image, .performance = performance};
}

ImageRaw ClassifierPredictor::preprocess(const cv::Mat &source_image) {
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
cv::Mat crop_img;
img.copyTo(crop_img);
cv::Mat resize_img;

int index = 0;
float wh_ratio = static_cast<float>(crop_img.cols) / static_cast<float>(crop_img.rows);
cv::Mat resized_image =
classifier_resize_image(source_image, classifier_image_shape[1], classifier_image_shape[2], m_options);
resized_image.convertTo(resized_image, CV_32FC3, 1 / 255.f);

resize_img = ClsResizeImg(crop_img);
resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.f);
const float *destination_image = reinterpret_cast<const float *>(resized_image.data);
std::vector<float> data(resized_image.rows * resized_image.cols * 3);
NHWC3ToNC3HW(destination_image, data.data(), resized_image.rows * resized_image.cols, mean, scale);

const float *dimg = reinterpret_cast<const float *>(resize_img.data);
ImageRaw image_raw {.data = data, .width = resized_image.cols, .height = resized_image.rows, .channels = 3};

return image_raw;
}

cv::Mat ClassifierPredictor::postprocess(const cv::Mat &srcimg, const float thresh) {
// Get output and run postprocess
// std::unique_ptr<const Tensor> softmax_out(
// std::move(predictor_->GetOutput(0)));
// auto *softmax_scores = softmax_out->mutable_data<float>();
// auto softmax_out_shape = softmax_out->shape();
// float score = 0;
// int label = 0;
// for (int i = 0; i < softmax_out_shape[1]; i++)
// {
// if (softmax_scores[i] > score)
// {
// score = softmax_scores[i];
// label = i;
// }
// }
// if (label % 2 == 1 && score > thresh)
// {
// cv::rotate(srcimg, srcimg, 1);
// }
// return srcimg;
cv::Mat a {};
return a;
ClassifierResultData ClassifierPredictor::postprocess(ModelOutput &model_output, const cv::Mat &source_image,
const float thresh) {
std::cout << "[DEBUG] 1" << std::endl;
auto softmax_scores = model_output.data;
auto softmax_out_shape = model_output.shape;
float score = 0;
int label = 0;
for (int i = 0; i < softmax_out_shape[1]; i++) {
if (softmax_scores[i] > score) {
score = softmax_scores[i];
label = i;
}
}
if (label % 2 == 1 && score > thresh) {
std::cout << "[DEBUG] 2" << std::endl;
cv::rotate(source_image, source_image, 1);
std::cout << "[DEBUG] 3" << std::endl;
}
return source_image;
}

cv::Mat ClassifierPredictor::predict(const cv::Mat &img, double *preprocess_time, double *predictTime,
double *postprocessTime, const float thresh) {
cv::Mat src_img;
img.copyTo(src_img);
// Timer tic;
// tic.start();
preprocess(img);
// tic.end();
// *preprocess_time = tic.get_average_ms();
// std::cout << "cls predictor preprocess costs" << *preprocess_time;

// tic.start();
// predictor_->Run();
// tic.end();
// *predictTime = tic.get_average_ms();
// std::cout << "cls predictor predict costs" << *predictTime;

// tic.start();
cv::Mat srcimg = postprocess(src_img, thresh);
// tic.end();
// *postprocessTime = tic.get_average_ms();
// std::cout << "cls predictor predict costs" << *postprocessTime;
return srcimg;
cv::Mat classifier_resize_image(cv::Mat source_image, int resize_height, int max_resize_width, Options &options) {
auto source_width = source_image.cols;
auto source_height = source_image.rows;
float wh_ratio = static_cast<float>(source_width) / static_cast<float>(source_height);
auto resize_width = int(ceilf(resize_height * wh_ratio));
if (resize_width > max_resize_width) {
resize_width = max_resize_width;
}
if (options.is_debug) {
std::cout << "[DEBUG] Classifier resize image from " << source_width << "x" << source_height << " to "
<< resize_width << "x" << resize_height << std::endl;
}
cv::Mat resized_image;
cv::resize(source_image, resized_image, cv::Size(resize_width, resize_height), 0.f, 0.f, cv::INTER_LINEAR);
if (resize_width < max_resize_width) {
cv::copyMakeBorder(resized_image, resized_image, 0, 0, 0, max_resize_width - resize_width, cv::BORDER_CONSTANT,
cv::Scalar(0, 0, 0));
}
return resized_image;
}
26 changes: 20 additions & 6 deletions packages/react-native/cpp/classifier_process.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,33 @@
// limitations under the License.

#pragma once

#include "onnx.h"
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "options.h"
#include "shared.h"
#include "utils.h"

using ClassifierResultData = cv::Mat;

struct ClassifierResult {
ClassifierResultData data {};
ModelPerformance performance {};
};

class ClassifierPredictor {
public:
explicit ClassifierPredictor(const std::string &modelDir, const int cpu_thread_num,
const std::string &cpu_power_mode);
explicit ClassifierPredictor(Options &options);

cv::Mat predict(const cv::Mat &rgb_image, double *preprocess_time, double *predictTime, double *postprocessTime,
const float thresh);
ClassifierResult predict(const cv::Mat &image, const float thresh);

private:
void preprocess(const cv::Mat &rgba_image);
cv::Mat postprocess(const cv::Mat &img, const float thresh);
Options m_options {};

Onnx m_onnx;

ImageRaw preprocess(const cv::Mat &image);

ClassifierResultData postprocess(ModelOutput &model_output, const cv::Mat &source_image, const float thresh);
};
13 changes: 5 additions & 8 deletions packages/react-native/cpp/detection_process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,18 @@

#include "detection_process.h"
#include <cmath>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "db_post_process.h"
#include "timer.h"

void resize_image(const cv::Mat image, cv::Mat &resized_image, Options &options);

DetectionPredictor::DetectionPredictor(Options &options, const int cpu_thread_num, const std::string &cpu_power_mode)
DetectionPredictor::DetectionPredictor(Options &options)
: m_options {options}, m_onnx {Onnx(options.models.detection_model_path)} {}

DetectionResult DetectionPredictor::predict(cv::Mat &image) {
ModelPerformance performance;
ModelPerformance performance {};

cv::Mat source_image;
image.copyTo(source_image);
Expand All @@ -38,12 +35,12 @@ DetectionResult DetectionPredictor::predict(cv::Mat &image) {
auto preprocess_result = preprocess(image);
timer.end();
performance.preprocess_time = timer.get_average_ms();

auto &model_input = preprocess_result.model_input;
auto &resized_image = preprocess_result.resized_image;

// Run predictor
std::vector<int64_t> input_shape = {1, model_input.channels, model_input.height, model_input.width};

timer.start();
auto model_output = m_onnx.run(model_input.data, input_shape);
timer.end();
Expand All @@ -60,8 +57,8 @@ DetectionResult DetectionPredictor::predict(cv::Mat &image) {
return DetectionResult {.data = filter_boxes, .performance = performance};
}

PreprocessResult DetectionPredictor::preprocess(const cv::Mat &source_image) {
PreprocessResult result {};
DetectionPreprocessResult DetectionPredictor::preprocess(const cv::Mat &source_image) {
DetectionPreprocessResult result {};
resize_image(source_image, result.resized_image, m_options);

cv::Mat model_data;
Expand Down
9 changes: 6 additions & 3 deletions packages/react-native/cpp/detection_process.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@
#pragma once

#include <map>
#include <string>
#include <vector>
#include "onnx.h"
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "options.h"
#include "shared.h"
#include "utils.h"

struct PreprocessResult {
struct DetectionPreprocessResult {
ImageRaw model_input {};
cv::Mat resized_image {};
};
Expand All @@ -36,15 +38,16 @@ struct DetectionResult {

class DetectionPredictor {
public:
explicit DetectionPredictor(Options &options, const int cpu_thread_num, const std::string &cpu_power_mode);
explicit DetectionPredictor(Options &options);

DetectionResult predict(cv::Mat &rgb_image);

private:
Options m_options {};

Onnx m_onnx;

PreprocessResult preprocess(const cv::Mat &image);
DetectionPreprocessResult preprocess(const cv::Mat &image);

DetectionResultData postprocess(ModelOutput &model_output, const cv::Mat &source_image, const cv::Mat &resized_image,
Options &options);
Expand Down
Loading

0 comments on commit 386cc70

Please sign in to comment.