ropensci · JanMarvin · Apr 4, 2020 · Apr 5, 2020 · Apr 5, 2020 · Apr 5, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,5 +16,5 @@ LinkingTo: Rcpp
 Imports: Rcpp, magrittr
 LazyData: true
 Encoding: UTF-8
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.0
 Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
@@ -20,6 +20,7 @@ export(ocv_read)
 export(ocv_resize)
 export(ocv_sketch)
 export(ocv_stylize)
+export(ocv_text)
 export(ocv_video)
 export(ocv_write)
 importFrom(Rcpp,sourceCpp)

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -105,3 +105,7 @@ cvmat_markers <- function(ptr) {
     .Call('_opencv_cvmat_markers', PACKAGE = 'opencv', ptr)
 }
 
+text_detection <- function(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) {
+    .Call('_opencv_text_detection', PACKAGE = 'opencv', input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw)
+}
+
diff --git a/R/text.R b/R/text.R
@@ -0,0 +1,47 @@
+#' ocv_text text detection in images using EAST
+#'
+#' @description OpenCV sample using EAST (Efficient and Accurate Scene Text
+#'   Detector) to detect text in images. Requires the EAST pb-model not included
+#'   in the package.
+#'
+#' @param image opencv image to be processed.
+#' @param width Preprocess input image by resizing to a specific width. It
+#'   should be multiple by 32.
+#' @param height Preprocess input image by resizing to a specific height. It
+#'   should be multiple by 32.
+#' @param thrs Confidence threshold between 0 and 1.
+#' @param nms Non-maximum suppression threshold.
+#' @param model Path to a binary .pb file contains trained network.
+#' @param draw Draws visual output to the image.
+#'
+#' @examples
+#' \dontrun{
+#' url <- paste0('https://upload.wikimedia.org/wikipedia/commons/6/6f/',
+#'               'Keep-calm-and-carry-on-scan.jpg')
+#' fl <- ocv_read(url)
+#'
+#' ocv_text(image = fl, thrs =  0.7,  nms = 0.3,
+#'          model = "frozen_east_text_detection.pb")
+#' }
+#'
+#' @references
+#' https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp
+#'
+#' @export
+ocv_text <- function(image, thrs = 0.5, nms = 20, width = 320, height = 320,
+                     model, draw = FALSE){
+
+  if(missing(model))
+    stop("requires a model pb-file")
+
+  model <- path.expand(model)
+
+  text_detection(input = image,
+                 confThreshold = thrs,
+                 nmsThreshold = nms,
+                 inpWidth = width,
+                 inpHeight = height,
+                 model = model,
+                 draw = draw)
+}
+
diff --git a/man/ocv_text.Rd b/man/ocv_text.Rd
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -296,6 +296,23 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// text_detection
+XPtrMat text_detection(XPtrMat input, float confThreshold, float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw);
+RcppExport SEXP _opencv_text_detection(SEXP inputSEXP, SEXP confThresholdSEXP, SEXP nmsThresholdSEXP, SEXP inpWidthSEXP, SEXP inpHeightSEXP, SEXP modelSEXP, SEXP drawSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< XPtrMat >::type input(inputSEXP);
+    Rcpp::traits::input_parameter< float >::type confThreshold(confThresholdSEXP);
+    Rcpp::traits::input_parameter< float >::type nmsThreshold(nmsThresholdSEXP);
+    Rcpp::traits::input_parameter< int >::type inpWidth(inpWidthSEXP);
+    Rcpp::traits::input_parameter< int >::type inpHeight(inpHeightSEXP);
+    Rcpp::traits::input_parameter< std::string >::type model(modelSEXP);
+    Rcpp::traits::input_parameter< bool >::type draw(drawSEXP);
+    rcpp_result_gen = Rcpp::wrap(text_detection(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_opencv_cvmat_destroy", (DL_FUNC) &_opencv_cvmat_destroy, 1},
@@ -324,6 +341,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_opencv_cvmat_edges", (DL_FUNC) &_opencv_cvmat_edges, 1},
     {"_opencv_cvmat_hog", (DL_FUNC) &_opencv_cvmat_hog, 1},
     {"_opencv_cvmat_markers", (DL_FUNC) &_opencv_cvmat_markers, 1},
+    {"_opencv_text_detection", (DL_FUNC) &_opencv_text_detection, 7},
     {NULL, NULL, 0}
 };
 

diff --git a/src/text.cpp b/src/text.cpp
@@ -0,0 +1,171 @@
+#include "util.hpp"
+#include <opencv2/imgproc.hpp>
+
+#define OPENCV_VERSION (CV_VERSION_MAJOR * 10000 \
++ CV_VERSION_MINOR * 100                         \
++ CV_VERSION_REVISION)
+
+#ifdef HAVE_OPENCV_DNN
+#include "opencv2/dnn.hpp"
+#endif
+
+using namespace cv;
+
+void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
+            std::vector<RotatedRect>& detections,
+            std::vector<float>& confidences)
+{
+#if !defined(HAVE_OPENCV_DNN) || OPENCV_VERSION < 30403
+  throw std::runtime_error("ocv_text req. OpenCV 3.4.3 or newer with DNN");
+#else
+  detections.clear();
+  CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4);
+  CV_Assert(scores.size[0] == 1); CV_Assert(geometry.size[0] == 1);
+  CV_Assert(scores.size[1] == 1); CV_Assert(geometry.size[1] == 5);
+  CV_Assert(scores.size[2] == geometry.size[2]);
+  CV_Assert(scores.size[3] == geometry.size[3]);
+
+  const int height = scores.size[2];
+  const int width = scores.size[3];
+  for (int y = 0; y < height; ++y)
+  {
+    const float* scoresData = scores.ptr<float>(0, 0, y);
+    const float* x0_data = geometry.ptr<float>(0, 0, y);
+    const float* x1_data = geometry.ptr<float>(0, 1, y);
+    const float* x2_data = geometry.ptr<float>(0, 2, y);
+    const float* x3_data = geometry.ptr<float>(0, 3, y);
+    const float* anglesData = geometry.ptr<float>(0, 4, y);
+    for (int x = 0; x < width; ++x)
+    {
+      float score = scoresData[x];
+      if (score < scoreThresh)
+        continue;
+
+      // Decode a prediction.
+      // Multiple by 4 because feature maps are 4 time less than input image.
+      float offsetX = x * 4.0f, offsetY = y * 4.0f;
+      float angle = anglesData[x];
+      float cosA = std::cos(angle);
+      float sinA = std::sin(angle);
+      float h = x0_data[x] + x2_data[x];
+      float w = x1_data[x] + x3_data[x];
+
+      Point2f offset(offsetX + cosA * x1_data[x] + sinA * x2_data[x],
+                     offsetY - sinA * x1_data[x] + cosA * x2_data[x]);
+      Point2f p1 = Point2f(-sinA * h, -cosA * h) + offset;
+      Point2f p3 = Point2f(-cosA * w, sinA * w) + offset;
+      RotatedRect r(0.5f * (p1 + p3), Size2f(w, h),
+                    -angle * 180.0f / (float)CV_PI);
+      detections.push_back(r);
+      confidences.push_back(score);
+    }
+  }
+#endif
+}
+
+
+// [[Rcpp::export]]
+XPtrMat
+  text_detection(XPtrMat input, float confThreshold,float nmsThreshold,
+                 int inpWidth, int inpHeight, std::string model, bool draw)
+  {
+#if !defined(HAVE_OPENCV_DNN) || OPENCV_VERSION < 30403
+    throw std::runtime_error("ocv_text req. OpenCV 3.4.3 or newer with DNN");
+#else
+
+
+    Mat frame, blob;
+    Mat inp = get_mat(input);
+    frame = inp.clone();
+
+    std::vector<int> indices;
+
+    std::vector<Mat> outs;
+    std::vector<String> outNames(2);
+    outNames[0] = "feature_fusion/Conv_7/Sigmoid";
+    outNames[1] = "feature_fusion/concat_3";
+
+    // Load network.
+    cv::dnn::Net net = cv::dnn::readNet(model);
+
+    cv::dnn::blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight),
+                           Scalar(123.68, 116.78, 103.94), true, false);
+    net.setInput(blob);
+    net.forward(outs, outNames);
+
+    Mat scores = outs[0];
+    Mat geometry = outs[1];
+
+    // Decode predicted bounding boxes.
+    std::vector<RotatedRect> boxes;
+    std::vector<float> confidences;
+    decode(scores, geometry, confThreshold, boxes, confidences);
+
+    // Apply non-maximum suppression procedure.
+    cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
+
+    // initiate output vetors
+    Rcpp::IntegerVector x1vec(indices.size());
+    Rcpp::IntegerVector y1vec(indices.size());
+    Rcpp::IntegerVector x2vec(indices.size());
+    Rcpp::IntegerVector y2vec(indices.size());
+    Rcpp::IntegerVector x3vec(indices.size());
+    Rcpp::IntegerVector y3vec(indices.size());
+    Rcpp::IntegerVector x4vec(indices.size());
+    Rcpp::IntegerVector y4vec(indices.size());
+
+    // Render detections.
+    Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight);
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+      RotatedRect& box = boxes[indices[i]];
+
+      Point2f vertices[4];
+      box.points(vertices);
+      for (int j = 0; j < 4; ++j)
+      {
+        vertices[j].x *= ratio.x;
+        vertices[j].y *= ratio.y;
+      }
+
+      x1vec.at(i) = vertices[0].x;
+      y1vec.at(i) = vertices[0].y;
+      x2vec.at(i) = vertices[1].x;
+      y2vec.at(i) = vertices[1].y;
+      x3vec.at(i) = vertices[2].x;
+      y3vec.at(i) = vertices[2].y;
+      x4vec.at(i) = vertices[3].x;
+      y4vec.at(i) = vertices[3].y;
+
+      // draws boxes to image
+      if (draw)
+        for (int j = 0; j < 4; ++j)
+          line(frame, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1);
+    }
+
+    if (draw) {
+      // Draws efficiency information to the image.
+      std::vector<double> layersTimes;
+      double freq = getTickFrequency() / 1000;
+      double t = net.getPerfProfile(layersTimes) / freq;
+      std::string label = format("Inference time: %.2f ms", t);
+      putText(frame, label, Point(0, 15),
+              FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+    }
+
+    // prepare output
+    XPtrMat out = cvmat_xptr(frame);
+    out.attr("indices") =  Rcpp::DataFrame::create(
+      Rcpp::_["x1"] = x1vec,
+      Rcpp::_["y1"] = y1vec,
+      Rcpp::_["x2"] = x2vec,
+      Rcpp::_["y2"] = y2vec,
+      Rcpp::_["x3"] = x3vec,
+      Rcpp::_["y3"] = y3vec,
+      Rcpp::_["x4"] = x4vec,
+      Rcpp::_["y4"] = y4vec
+    );
+
+    return out;
+#endif
+  }