From 05734c1e3ae0a3070217e6fccdaffd75852c7dc2 Mon Sep 17 00:00:00 2001 From: Guten Ye Date: Tue, 28 May 2024 21:39:30 +0800 Subject: [PATCH] feat(cpp): detect returns vector --- README.md | 9 ++++++++- .../react-native/cpp/example/cpp-example.cpp | 6 +++--- packages/react-native/cpp/native-ocr.cpp | 19 ++++++++----------- packages/react-native/cpp/native-ocr.h | 2 +- .../react-native/cpp/recognition_process.cpp | 15 +++++++++------ .../react-native/cpp/recognition_process.h | 18 ++++++++++++++---- 6 files changed, 43 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 0bf01c9..7db974f 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,14 @@ Ocr.create({ ocr.detect(imagePath, { onnxOptions?: {} // Node only. Pass to ONNX Runtime -}): Promise +}): Promise + +TextLine { + text: string + score: number + frame: { top, left, width, height } +} + ``` ## Development diff --git a/packages/react-native/cpp/example/cpp-example.cpp b/packages/react-native/cpp/example/cpp-example.cpp index d7606ef..4af3f3f 100644 --- a/packages/react-native/cpp/example/cpp-example.cpp +++ b/packages/react-native/cpp/example/cpp-example.cpp @@ -26,10 +26,10 @@ int main(int argc, char* argv[]) { fs::remove_all(debug_output_dir); fs::create_directory(debug_output_dir); NativeOcr* ocr = new NativeOcr(rawOptions, asset_dir, debug_output_dir); - auto lines = ocr->detect(image_path); + auto text_lines = ocr->detect(image_path); - // for (auto line : lines) { - // std::cout << line << std::endl; + // for (auto text_line : text_lines) { + // std::cout << text_line.score << " " << text_line.text << std::endl; // } return 0; } catch (const std::exception& e) { diff --git a/packages/react-native/cpp/native-ocr.cpp b/packages/react-native/cpp/native-ocr.cpp index 4a8d3be..fe074ad 100644 --- a/packages/react-native/cpp/native-ocr.cpp +++ b/packages/react-native/cpp/native-ocr.cpp @@ -42,7 +42,7 @@ NativeOcr::NativeOcr(std::unordered_map rawOptions, const m_dictionary.push_back(" "); } -std::vector NativeOcr::detect(std::string &image_path) { +std::vector NativeOcr::detect(std::string &image_path) { Timer timer; timer.start(); @@ -70,8 +70,7 @@ std::vector NativeOcr::detect(std::string &image_path) { cv::Mat image_copy; image.copyTo(image_copy); - std::vector recognition_text; - std::vector recognition_text_score; + std::vector text_lines; std::vector classifier_results; std::vector recognition_results; for (int i = detection_result.data.size() - 1; i >= 0; i--) { @@ -98,8 +97,7 @@ std::vector NativeOcr::detect(std::string &image_path) { cv::Mat resized_image; auto recognition_result = m_recognition_predictor->predict(crop_image, m_dictionary, resized_image); recognition_results.push_back(recognition_result); - recognition_text.push_back(recognition_result.data.first); - recognition_text_score.push_back(recognition_result.data.second); + text_lines.push_back(recognition_result.data); // if (m_options.is_debug) { // auto output_path = @@ -157,15 +155,14 @@ std::vector NativeOcr::detect(std::string &image_path) { } // print recognized text - std::vector lines(recognition_text.size()); - for (int i = 0; i < lines.size(); i++) { - if (m_options.is_debug) { - std::cout << "[DEBUG] " << i << "\t" << recognition_text_score[i] << "\t" << recognition_text[i] << std::endl; + if (m_options.is_debug) { + for (size_t index = 0; index < text_lines.size(); index++) { + auto text_line = text_lines[index]; + std::cout << "[DEBUG] " << index << "\t" << text_line.score << "\t" << text_line.text << std::endl; } - lines[i] = recognition_text[i]; } - return lines; + return text_lines; } cv::Mat get_rotate_crop_image(cv::Mat source_image, std::vector> box) { diff --git a/packages/react-native/cpp/native-ocr.h b/packages/react-native/cpp/native-ocr.h index 7789cb4..c99cc59 100644 --- a/packages/react-native/cpp/native-ocr.h +++ b/packages/react-native/cpp/native-ocr.h @@ -31,7 +31,7 @@ class NativeOcr { NativeOcr(std::unordered_map rawOptions, const std::string &assetDir, const std::string &debugOutputDir); - std::vector detect(std::string &image_path); + std::vector detect(std::string &image_path); private: Options m_options; diff --git a/packages/react-native/cpp/recognition_process.cpp b/packages/react-native/cpp/recognition_process.cpp index b161c0e..d39d4f4 100644 --- a/packages/react-native/cpp/recognition_process.cpp +++ b/packages/react-native/cpp/recognition_process.cpp @@ -47,14 +47,13 @@ RecognitionResult RecognitionPredictor::predict(const cv::Mat &source_image, std performance.predict_time = timer.get_average_ms(); timer.start(); - auto res = postprocess(model_output, source_image, charactor_dict); + auto text_line = postprocess(model_output, source_image, charactor_dict); timer.end(); - auto postprocessTime = timer.get_average_ms(); performance.postprocess_time = timer.get_average_ms(); performance.total_time = performance.preprocess_time + performance.predict_time + performance.postprocess_time; - return RecognitionResult {.data = res, .performance = performance}; + return RecognitionResult {.data = text_line, .performance = performance}; } ImageRaw RecognitionPredictor::preprocess(const cv::Mat &source_image, cv::Mat &resized_image) { @@ -76,8 +75,8 @@ ImageRaw RecognitionPredictor::preprocess(const cv::Mat &source_image, cv::Mat & return image_raw; } -std::pair RecognitionPredictor::postprocess(ModelOutput &model_output, const cv::Mat &source_image, - std::vector charactor_dict) { +TextLine RecognitionPredictor::postprocess(ModelOutput &model_output, const cv::Mat &source_image, + std::vector charactor_dict) { auto predict_batch = model_output.data; auto predict_shape = model_output.shape; @@ -101,7 +100,11 @@ std::pair RecognitionPredictor::postprocess(ModelOutput &mod last_index = argmax_idx; } score /= count; - return std::make_pair(text, score); + + return TextLine { + .text = text, + .score = score, + }; } cv::Mat recognition_resize_image(cv::Mat source_image, int resize_height, int resize_max_width, Options &options) { diff --git a/packages/react-native/cpp/recognition_process.h b/packages/react-native/cpp/recognition_process.h index 2738b08..535d6d7 100644 --- a/packages/react-native/cpp/recognition_process.h +++ b/packages/react-native/cpp/recognition_process.h @@ -21,10 +21,21 @@ #include "shared.h" #include "utils.h" -using RecognitionResultData = std::pair; +struct Frame { + double width {}; + double height {}; + double top {}; + double left {}; +}; + +struct TextLine { + std::string text {}; + float score {}; + Frame frame {}; +}; struct RecognitionResult { - RecognitionResultData data {}; + TextLine data {}; ModelPerformance performance {}; }; @@ -40,6 +51,5 @@ class RecognitionPredictor { ImageRaw preprocess(const cv::Mat &rgba_image, cv::Mat &resized_image); - RecognitionResultData postprocess(ModelOutput &model_output, const cv::Mat &rgba_image, - std::vector charactor_dict); + TextLine postprocess(ModelOutput &model_output, const cv::Mat &rgba_image, std::vector charactor_dict); };