Implement YuNet as an alternative, parallel presence detector alongsi…

…de Darknet. (#2285) * WIP break presence detection down to modules * Split YuNet face detection to separate module * WIP integrate both Yunet and Darknet with config tool * Fully integrate presence detection * Cleanup * Fix timeout reset and cleanup * Decouple FPS setting between darknet and yunet * Reintroduce missing yunet metrics * Improve traces and fix reported yunet presence values * Update configs * Fix metrics name * Add missing sliders * Add changelog entry * Improve FFI boundary with YuNet * Gate processor-specific definitions behind feature flags, default to x86 * Add C++ std flag * Lift out-of-order frame detection out of specific detectors * Remove overly specific minor version from image crate dependency * Clean up cargo.toml * Use target_feature conditional compilation instead of cargo features * Add an FFI bridge wrapper to reduce impact on the upstream YuNet code * Add missing initializer * Make face conversion function fallible and discard invalid faces * Address review comments * Add debug log when discarding a bad face * Address review comments * Clear presence boxes after disabling presence detectors * Cleanup * Address review comments * Add support for face landmarks * rename config defaults * Improve grafana metrics with detector labels * Prettify face landmarks with specific landmark definitions * Render landmark boxes in debug mode * Fix landmark scaling * Add face confidence metric * Address review comments * Address review comments * Add config label to help prevent misconfiguration of presence detectors * Clarify rectangle reflection * Improve warning label * Reset metrics before disabling yunet/darknet detectors * Reset also confidence in yunet reset_and_report(( --------- Co-authored-by: Matěj Laitl <[email protected]>
tonarino · Mar 15, 2023 · 7474ff2 · 7474ff2
commit 7474ff2
Show file tree

Hide file tree

Showing 12 changed files with 1,993 additions and 0 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "rusty-yunet"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+cxx = "1.0"
+image = "0.23"
+log = "0.4"
+serde = { version = "1", features = ["derive"] }
+thiserror = "1.0"
+tonari-math = { path = "../tonari-math" }
+
+[build-dependencies]
+cxx-build = "1.0"
+
+[features]
+default = []
diff --git a/README.md b/README.md
@@ -0,0 +1,12 @@
+## Rusty YuNet
+
+A thin rusty wrapper over the YuNet face detection network (aka `libfacedetect`).
+
+Currently frozen to commit 
+[40926655865c233b33d3de94302174efb6b5ac55](https://github.com/ShiqiYu/libfacedetection/tree/40926655865c233b33d3de94302174efb6b5ac55).
+
+### Update strategy
+
+At the moment, `Rusty Yunet` doesn't track the upstream YuNet repository, so any upstream patches must
+be implemented manually. For the time being, these will be limited to significant bugfixes and security/safety
+patches.
diff --git a/build.rs b/build.rs
@@ -0,0 +1,31 @@
+fn main() {
+    let mut build = cxx_build::bridge("src/lib.rs");
+    let build = build
+        .include("src/libfacedetection")
+        .file("src/libfacedetection/facedetectcnn-data.cpp")
+        .file("src/libfacedetection/facedetectcnn-model.cpp")
+        .file("src/libfacedetection/facedetectcnn.cpp")
+        .file("src/bridge_wrapper.cpp")
+        .flag_if_supported("-std=c++11")
+        .flag("-O3");
+
+    // AVX (advanced vector extensions) support
+    #[cfg(target_feature = "avx2")]
+    let build = build.flag("-mavx2").define("_ENABLE_AVX2", None);
+
+    // Fused multiply-add instruction support.
+    #[cfg(target_feature = "fma")]
+    let build = build.flag("-mfma");
+
+    #[cfg(target_feature = "neon")]
+    let build = build.define("_ENABLE_NEON", None);
+
+    build.compile("rusty-yunet");
+
+    println!("cargo:rerun-if-changed=src/libfacedetection/facedetectcnn-data.cpp");
+    println!("cargo:rerun-if-changed=src/libfacedetection/facedetectcnn-model.cpp");
+    println!("cargo:rerun-if-changed=src/libfacedetection/facedetectcnn.cpp");
+    println!("cargo:rerun-if-changed=src/libfacedetection/facedetectcnn.h");
+    println!("cargo:rerun-if-changed=src/bridge_wrapper.h");
+    println!("cargo:rerun-if-changed=src/bridge_wrapper.cpp");
+}
diff --git a/sample.jpg b/sample.jpg
diff --git a/src/bridge_wrapper.cpp b/src/bridge_wrapper.cpp
@@ -0,0 +1,22 @@
+#include "bridge_wrapper.h"
+
+rust::Vec<BridgeFace> wrapper_detect_faces(const unsigned char* rgbImageData, int width, int height, int step) {
+    rust::Vec<BridgeFace> rust_faces;
+    std::vector<FaceRect> faces = objectdetect_cnn(rgbImageData, width, height, step); 
+
+    for (FaceRect f: faces) {
+        BridgeFace bridge_face = BridgeFace {
+            .score = f.score,
+            .x = f.x,
+            .y = f.y,
+            .w = f.w,
+            .h = f.h,
+            .lm = {}
+        };
+
+        std::copy(std::begin(f.lm), std::end(f.lm), bridge_face.lm.begin());
+        rust_faces.push_back(bridge_face);
+    }
+
+    return rust_faces;
+}
diff --git a/src/bridge_wrapper.h b/src/bridge_wrapper.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "rusty-yunet/src/libfacedetection/facedetectcnn.h"
+#include "rusty-yunet/src/lib.rs.h"
+#include "rust/cxx.h"
+
+#include <vector>
+
+rust::Vec<BridgeFace> wrapper_detect_faces(const unsigned char* rgbImageData, int width, int height, int step);
diff --git a/src/lib.rs b/src/lib.rs
@@ -0,0 +1,197 @@
+#![warn(clippy::all, clippy::clone_on_ref_ptr, clippy::mod_module_files)]
+
+use image::{buffer::ConvertBuffer, open, Bgr, ImageBuffer};
+use log::debug;
+use serde::Serialize;
+use std::{num::TryFromIntError, path::Path};
+use thiserror::Error;
+use tonari_math::Rect;
+
+#[cxx::bridge]
+mod ffi {
+    // Shared type visible from both C++ and Rust
+    #[derive(Debug)]
+    struct BridgeFace {
+        score: f32,
+        x: i32,
+        y: i32,
+        w: i32,
+        h: i32,
+        lm: [i32; 10],
+    }
+
+    unsafe extern "C++" {
+        include!("rusty-yunet/src/bridge_wrapper.h");
+
+        unsafe fn wrapper_detect_faces(
+            rgb_image_data: *const u8,
+            width: i32,
+            height: i32,
+            step: i32,
+        ) -> Vec<BridgeFace>;
+    }
+}
+
+#[derive(Error, Debug)]
+pub enum YuNetError {
+    #[error("Invalid input file")]
+    InvalidFile,
+    #[error("Image error")]
+    ImageError(#[from] image::ImageError),
+    #[error("Face detection failed")]
+    FaceDetectionFailed,
+}
+
+/// NOTE: "right" and "left" are defined in the natural face sense;
+/// a person's right eye is seen on the left side of the screen.
+#[derive(Debug, Clone, Serialize)]
+pub struct FaceLandmarks<T> {
+    pub right_eye: (T, T),
+    pub left_eye: (T, T),
+    pub nose: (T, T),
+    pub mouth_right: (T, T),
+    pub mouth_left: (T, T),
+}
+
+impl FaceLandmarks<u16> {
+    fn from_yunet_landmark_array(landmarks: &[i32; 10]) -> Result<Self, TryFromIntError> {
+        Ok(Self {
+            right_eye: (landmarks[0].try_into()?, landmarks[1].try_into()?),
+            left_eye: (landmarks[2].try_into()?, landmarks[3].try_into()?),
+            nose: (landmarks[4].try_into()?, landmarks[5].try_into()?),
+            mouth_right: (landmarks[6].try_into()?, landmarks[7].try_into()?),
+            mouth_left: (landmarks[8].try_into()?, landmarks[9].try_into()?),
+        })
+    }
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct Face {
+    /// How confident (0..1) YuNet is that the rectangle represents a valid face.
+    confidence: f32,
+    /// Location of the face on absolute pixel coordinates.
+    rectangle: Rect<u16>,
+    /// The resolution of the image in which this face was detected (width, height).
+    detection_dimensions: (u16, u16),
+    /// Coordinates of five face landmarks.
+    landmarks: FaceLandmarks<u16>,
+}
+
+impl Face {
+    /// Conversion is fallible, as YuNet has been known to report faces with
+    /// negative dimensions, rarely.
+    fn from_yunet_bridge_face(
+        face_rect: &ffi::BridgeFace,
+        detection_dimensions: (u16, u16),
+    ) -> Result<Self, TryFromIntError> {
+        Ok(Self {
+            confidence: face_rect.score,
+            rectangle: Rect::with_size(
+                face_rect.x.try_into()?,
+                face_rect.y.try_into()?,
+                face_rect.w.try_into()?,
+                face_rect.h.try_into()?,
+            ),
+            landmarks: FaceLandmarks::from_yunet_landmark_array(&face_rect.lm)?,
+            detection_dimensions,
+        })
+    }
+
+    /// How confident (0..1) YuNet is that the rectangle is a face.
+    pub fn confidence(&self) -> f32 {
+        self.confidence
+    }
+
+    /// Face rectangle in absolute pixel coordinates.
+    pub fn rectangle(&self) -> Rect<u16> {
+        self.rectangle
+    }
+
+    /// Face rectangle in normalized 0..1 coordinates.
+    pub fn normalized_rectangle(&self) -> Rect<f32> {
+        Rect::with_size(
+            self.rectangle.left() as f32 / self.detection_dimensions.0 as f32,
+            self.rectangle.top() as f32 / self.detection_dimensions.1 as f32,
+            self.rectangle.width() as f32 / self.detection_dimensions.0 as f32,
+            self.rectangle.height() as f32 / self.detection_dimensions.1 as f32,
+        )
+    }
+
+    /// Coordinates of five face landmarks.
+    pub fn landmarks(&self) -> &FaceLandmarks<u16> {
+        &self.landmarks
+    }
+
+    /// Coordinates of five face landmarks in normalized 0..1 coordinates.
+    pub fn normalized_landmarks(&self) -> FaceLandmarks<f32> {
+        FaceLandmarks {
+            right_eye: (
+                self.landmarks.right_eye.0 as f32 / self.detection_dimensions.0 as f32,
+                self.landmarks.right_eye.1 as f32 / self.detection_dimensions.1 as f32,
+            ),
+            left_eye: (
+                self.landmarks.left_eye.0 as f32 / self.detection_dimensions.0 as f32,
+                self.landmarks.left_eye.1 as f32 / self.detection_dimensions.1 as f32,
+            ),
+            nose: (
+                self.landmarks.nose.0 as f32 / self.detection_dimensions.0 as f32,
+                self.landmarks.nose.1 as f32 / self.detection_dimensions.1 as f32,
+            ),
+            mouth_right: (
+                self.landmarks.mouth_right.0 as f32 / self.detection_dimensions.0 as f32,
+                self.landmarks.mouth_right.1 as f32 / self.detection_dimensions.1 as f32,
+            ),
+            mouth_left: (
+                self.landmarks.mouth_left.0 as f32 / self.detection_dimensions.0 as f32,
+                self.landmarks.mouth_left.1 as f32 / self.detection_dimensions.1 as f32,
+            ),
+        }
+    }
+}
+
+pub fn detect_faces<T: ConvertBuffer<ImageBuffer<Bgr<u8>, Vec<u8>>>>(
+    image_buffer: &T,
+) -> Result<Vec<Face>, YuNetError> {
+    let image_buffer = image_buffer.convert();
+    let (width, height) = (image_buffer.width() as u16, image_buffer.height() as u16);
+
+    let faces = unsafe {
+        crate::ffi::wrapper_detect_faces(
+            image_buffer.as_ptr(),
+            width as i32,
+            height as i32,
+            3 * width as i32,
+        )
+    };
+    Ok(faces
+        .into_iter()
+        .filter_map(|f| match Face::from_yunet_bridge_face(&f, (width, height)) {
+            Ok(face) => Some(face),
+            Err(e) => {
+                debug!("Yunet detected an invalid face: {f:?}: {e}. Discarding it.");
+                None
+            },
+        })
+        .collect())
+}
+
+pub fn detect_faces_from_file(filename: impl AsRef<Path>) -> Result<Vec<Face>, YuNetError> {
+    let image_buffer = open(&filename)?.into_bgr8();
+    detect_faces(&image_buffer)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn detect_sample_faces() {
+        // Loads a sample with three faces clearly staggered in distance. Detecting the biggest
+        // face with high confidence should be completely expected. Detecting the mid-sized face
+        // is good, as it probably stretches what we consider "presence" in front of a normal
+        // installation. Detecting the smallest face is very unrealistic and unnecessary.
+        //
+        // Detecting two faces with this test at this resolution can be considered a good result.
+        assert_eq!(2, detect_faces_from_file("sample.jpg").unwrap().len());
+    }
+}