Skip to content

Commit

Permalink
feat(cpp): detect returns vector<TextLine>
Browse files Browse the repository at this point in the history
  • Loading branch information
gutenye committed May 28, 2024
1 parent ef0a1b6 commit 05734c1
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 26 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,14 @@ Ocr.create({
ocr.detect(imagePath, {
onnxOptions?: {} // Node only. Pass to ONNX Runtime
}): Promise<Result>
}): Promise<TextLine[]>
TextLine {
text: string
score: number
frame: { top, left, width, height }
}
```

## Development
Expand Down
6 changes: 3 additions & 3 deletions packages/react-native/cpp/example/cpp-example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ int main(int argc, char* argv[]) {
fs::remove_all(debug_output_dir);
fs::create_directory(debug_output_dir);
NativeOcr* ocr = new NativeOcr(rawOptions, asset_dir, debug_output_dir);
auto lines = ocr->detect(image_path);
auto text_lines = ocr->detect(image_path);

// for (auto line : lines) {
// std::cout << line << std::endl;
// for (auto text_line : text_lines) {
// std::cout << text_line.score << " " << text_line.text << std::endl;
// }
return 0;
} catch (const std::exception& e) {
Expand Down
19 changes: 8 additions & 11 deletions packages/react-native/cpp/native-ocr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ NativeOcr::NativeOcr(std::unordered_map<std::string, std::any> rawOptions, const
m_dictionary.push_back(" ");
}

std::vector<std::string> NativeOcr::detect(std::string &image_path) {
std::vector<TextLine> NativeOcr::detect(std::string &image_path) {
Timer timer;
timer.start();

Expand Down Expand Up @@ -70,8 +70,7 @@ std::vector<std::string> NativeOcr::detect(std::string &image_path) {
cv::Mat image_copy;
image.copyTo(image_copy);

std::vector<std::string> recognition_text;
std::vector<float> recognition_text_score;
std::vector<TextLine> text_lines;
std::vector<ClassifierResult> classifier_results;
std::vector<RecognitionResult> recognition_results;
for (int i = detection_result.data.size() - 1; i >= 0; i--) {
Expand All @@ -98,8 +97,7 @@ std::vector<std::string> NativeOcr::detect(std::string &image_path) {
cv::Mat resized_image;
auto recognition_result = m_recognition_predictor->predict(crop_image, m_dictionary, resized_image);
recognition_results.push_back(recognition_result);
recognition_text.push_back(recognition_result.data.first);
recognition_text_score.push_back(recognition_result.data.second);
text_lines.push_back(recognition_result.data);

// if (m_options.is_debug) {
// auto output_path =
Expand Down Expand Up @@ -157,15 +155,14 @@ std::vector<std::string> NativeOcr::detect(std::string &image_path) {
}

// print recognized text
std::vector<std::string> lines(recognition_text.size());
for (int i = 0; i < lines.size(); i++) {
if (m_options.is_debug) {
std::cout << "[DEBUG] " << i << "\t" << recognition_text_score[i] << "\t" << recognition_text[i] << std::endl;
if (m_options.is_debug) {
for (size_t index = 0; index < text_lines.size(); index++) {
auto text_line = text_lines[index];
std::cout << "[DEBUG] " << index << "\t" << text_line.score << "\t" << text_line.text << std::endl;
}
lines[i] = recognition_text[i];
}

return lines;
return text_lines;
}

cv::Mat get_rotate_crop_image(cv::Mat source_image, std::vector<std::vector<int>> box) {
Expand Down
2 changes: 1 addition & 1 deletion packages/react-native/cpp/native-ocr.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class NativeOcr {
NativeOcr(std::unordered_map<std::string, std::any> rawOptions, const std::string &assetDir,
const std::string &debugOutputDir);

std::vector<std::string> detect(std::string &image_path);
std::vector<TextLine> detect(std::string &image_path);

private:
Options m_options;
Expand Down
15 changes: 9 additions & 6 deletions packages/react-native/cpp/recognition_process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,13 @@ RecognitionResult RecognitionPredictor::predict(const cv::Mat &source_image, std
performance.predict_time = timer.get_average_ms();

timer.start();
auto res = postprocess(model_output, source_image, charactor_dict);
auto text_line = postprocess(model_output, source_image, charactor_dict);
timer.end();
auto postprocessTime = timer.get_average_ms();
performance.postprocess_time = timer.get_average_ms();

performance.total_time = performance.preprocess_time + performance.predict_time + performance.postprocess_time;

return RecognitionResult {.data = res, .performance = performance};
return RecognitionResult {.data = text_line, .performance = performance};
}

ImageRaw RecognitionPredictor::preprocess(const cv::Mat &source_image, cv::Mat &resized_image) {
Expand All @@ -76,8 +75,8 @@ ImageRaw RecognitionPredictor::preprocess(const cv::Mat &source_image, cv::Mat &
return image_raw;
}

std::pair<std::string, float> RecognitionPredictor::postprocess(ModelOutput &model_output, const cv::Mat &source_image,
std::vector<std::string> charactor_dict) {
TextLine RecognitionPredictor::postprocess(ModelOutput &model_output, const cv::Mat &source_image,
std::vector<std::string> charactor_dict) {
auto predict_batch = model_output.data;
auto predict_shape = model_output.shape;

Expand All @@ -101,7 +100,11 @@ std::pair<std::string, float> RecognitionPredictor::postprocess(ModelOutput &mod
last_index = argmax_idx;
}
score /= count;
return std::make_pair(text, score);

return TextLine {
.text = text,
.score = score,
};
}

cv::Mat recognition_resize_image(cv::Mat source_image, int resize_height, int resize_max_width, Options &options) {
Expand Down
18 changes: 14 additions & 4 deletions packages/react-native/cpp/recognition_process.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,21 @@
#include "shared.h"
#include "utils.h"

using RecognitionResultData = std::pair<std::string, float>;
struct Frame {
double width {};
double height {};
double top {};
double left {};
};

struct TextLine {
std::string text {};
float score {};
Frame frame {};
};

struct RecognitionResult {
RecognitionResultData data {};
TextLine data {};
ModelPerformance performance {};
};

Expand All @@ -40,6 +51,5 @@ class RecognitionPredictor {

ImageRaw preprocess(const cv::Mat &rgba_image, cv::Mat &resized_image);

RecognitionResultData postprocess(ModelOutput &model_output, const cv::Mat &rgba_image,
std::vector<std::string> charactor_dict);
TextLine postprocess(ModelOutput &model_output, const cv::Mat &rgba_image, std::vector<std::string> charactor_dict);
};

0 comments on commit 05734c1

Please sign in to comment.