Create video object detection demo (#607)

huggingface · Feb 27, 2024 · 271c6f1 · 271c6f1
1 parent 7772d1d
commit 271c6f1
Show file tree

Hide file tree

Showing 7 changed files with 1,824 additions and 0 deletions.
diff --git a/examples/video-object-detection/.gitignore b/examples/video-object-detection/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/video-object-detection/index.html b/examples/video-object-detection/index.html
@@ -0,0 +1,43 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Transformers.js | Real-time object detection</title>
+</head>
+
+<body>
+  <h1>
+    Real-time object detection w/
+    <a href="http://github.com/xenova/transformers.js" target="_blank">🤗 Transformers.js</a>
+  </h1>
+  <h4>
+    Runs locally in your browser, powered by
+    <a href="https://github.com/WongKinYiu/yolov9" target="_blank">YOLOv9</a>
+  </h4>
+  <div id="container">
+    <video id="video" autoplay muted playsinline></video>
+    <canvas id="canvas" width="360" height="240"></canvas>
+    <div id="overlay"></div>
+  </div>
+  <div id="controls">
+    <div>
+      <label>Image size</label>
+      (<label id="size-value">128</label>)
+      <br>
+      <input id="size" type="range" min="64" max="256" step="32" value="128" disabled>
+    </div>
+    <div>
+      <label>Threshold</label>
+      (<label id="threshold-value">0.25</label>)
+      <br>
+      <input id="threshold" type="range" min="0.01" max="1" step="0.01" value="0.25" disabled>
+    </div>
+  </div>
+  <label id="status"></label>
+
+  <script type="module" src="/main.js"></script>
+</body>
+
+</html>
diff --git a/examples/video-object-detection/main.js b/examples/video-object-detection/main.js
@@ -0,0 +1,146 @@
+import './style.css';
+
+import { env, AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';
+
+// Since we will download the model from the Hugging Face Hub, we can skip the local model check
+env.allowLocalModels = false;
+
+// Proxy the WASM backend to prevent the UI from freezing
+env.backends.onnx.wasm.proxy = true;
+
+// Reference the elements that we will need
+const status = document.getElementById('status');
+const container = document.getElementById('container');
+const overlay = document.getElementById('overlay');
+const canvas = document.getElementById('canvas');
+const video = document.getElementById('video');
+const thresholdSlider = document.getElementById('threshold');
+const thresholdLabel = document.getElementById('threshold-value');
+const sizeSlider = document.getElementById('size');
+const sizeLabel = document.getElementById('size-value');
+
+status.textContent = 'Loading model...';
+
+// Load model and processor
+const model_id = 'Xenova/gelan-c_all';
+const model = await AutoModel.from_pretrained(model_id);
+const processor = await AutoProcessor.from_pretrained(model_id);
+
+// Set up controls
+let threshold = 0.25;
+thresholdSlider.addEventListener('input', () => {
+    threshold = Number(thresholdSlider.value);
+    thresholdLabel.textContent = threshold.toFixed(2);
+});
+thresholdSlider.disabled = false;
+
+let size = 128;
+processor.feature_extractor.size = { shortest_edge: size };
+sizeSlider.addEventListener('input', () => {
+    size = Number(sizeSlider.value);
+    processor.feature_extractor.size = { shortest_edge: size };
+    sizeLabel.textContent = size;
+});
+sizeSlider.disabled = false;
+
+status.textContent = 'Ready';
+
+const COLOURS = [
+    "#EF4444", "#4299E1", "#059669",
+    "#FBBF24", "#4B52B1", "#7B3AC2",
+    "#ED507A", "#1DD1A1", "#F3873A",
+    "#4B5563", "#DC2626", "#1852B4",
+    "#18A35D", "#F59E0B", "#4059BE",
+    "#6027A5", "#D63D60", "#00AC9B",
+    "#E64A19", "#272A34"
+]
+
+// Render a bounding box and label on the image
+function renderBox([xmin, ymin, xmax, ymax, score, id], [w, h]) {
+    if (score < threshold) return; // Skip boxes with low confidence
+
+    // Generate a random color for the box
+    const color = COLOURS[id % COLOURS.length];
+
+    // Draw the box
+    const boxElement = document.createElement('div');
+    boxElement.className = 'bounding-box';
+    Object.assign(boxElement.style, {
+        borderColor: color,
+        left: 100 * xmin / w + '%',
+        top: 100 * ymin / h + '%',
+        width: 100 * (xmax - xmin) / w + '%',
+        height: 100 * (ymax - ymin) / h + '%',
+    })
+
+    // Draw label
+    const labelElement = document.createElement('span');
+    labelElement.textContent = `${model.config.id2label[id]} (${(100 * score).toFixed(2)}%)`;
+    labelElement.className = 'bounding-box-label';
+    labelElement.style.backgroundColor = color;
+
+    boxElement.appendChild(labelElement);
+    overlay.appendChild(boxElement);
+}
+
+let isProcessing = false;
+let previousTime;
+const context = canvas.getContext('2d', { willReadFrequently: true });
+function updateCanvas() {
+    const { width, height } = canvas;
+    context.drawImage(video, 0, 0, width, height);
+
+    if (!isProcessing) {
+        isProcessing = true;
+        (async function () {
+            // Read the current frame from the video
+            const pixelData = context.getImageData(0, 0, width, height).data;
+            const image = new RawImage(pixelData, width, height, 4);
+
+            // Process the image and run the model
+            const inputs = await processor(image);
+            const { outputs } = await model(inputs);
+
+            // Update UI
+            overlay.innerHTML = '';
+
+            const sizes = inputs.reshaped_input_sizes[0].reverse();
+            outputs.tolist().forEach(x => renderBox(x, sizes));
+
+            if (previousTime !== undefined) {
+                const fps = 1000 / (performance.now() - previousTime);
+                status.textContent = `FPS: ${fps.toFixed(2)}`;
+            }
+            previousTime = performance.now();
+            isProcessing = false;
+        })();
+    }
+
+    window.requestAnimationFrame(updateCanvas);
+}
+
+// Start the video stream
+navigator.mediaDevices.getUserMedia(
+    { video: true }, // Ask for video
+).then((stream) => {
+    // Set up the video and canvas elements.
+    video.srcObject = stream;
+    video.play();
+
+    const videoTrack = stream.getVideoTracks()[0];
+    const { width, height } = videoTrack.getSettings();
+
+    canvas.width = width;
+    canvas.height = height;
+
+    // Set container width and height depending on the image aspect ratio
+    const ar = width / height;
+    const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405];
+    container.style.width = `${cw}px`;
+    container.style.height = `${ch}px`;
+
+    // Start the animation loop
+    window.requestAnimationFrame(updateCanvas);
+}).catch((error) => {
+    alert(error);
+});