Create video background removal demo

huggingface · Mar 13, 2024 · 91c570c · 91c570c
1 parent d24f764
commit 91c570c
Show file tree

Hide file tree

Showing 6 changed files with 305 additions and 0 deletions.
diff --git a/examples/webgpu-video-background-removal/.gitignore b/examples/webgpu-video-background-removal/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-video-background-removal/index.html b/examples/webgpu-video-background-removal/index.html
@@ -0,0 +1,43 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Transformers.js | Real-time background removal</title>
+</head>
+
+<body>
+  <h1>
+    Real-time background removal w/
+    <a href="http://github.com/xenova/transformers.js" target="_blank">🤗 Transformers.js</a>
+  </h1>
+  <h4>
+    Runs locally in your browser, powered by
+    <a href="https://huggingface.co/Xenova/modnet" target="_blank">MODNet</a>
+  </h4>
+  <div id="container">
+    <video id="video" autoplay muted playsinline></video>
+    <canvas id="canvas" width="360" height="240"></canvas>
+    <canvas id="output-canvas" width="360" height="240"></canvas>
+  </div>
+  <div id="controls">
+    <div title="Read frames from your webcam and process them at a lower size (lower = faster)">
+      <label>Stream scale</label>
+      (<label id="scale-value">0.5</label>)
+      <br>
+      <input id="scale" type="range" min="0.1" max="1" step="0.1" value="0.5" disabled>
+    </div>
+    <div title="The length of the shortest edge of the image (lower = faster)">
+      <label>Image size</label>
+      (<label id="size-value">256</label>)
+      <br>
+      <input id="size" type="range" min="64" max="512" step="32" value="256" disabled>
+    </div>
+  </div>
+  <label id="status"></label>
+
+  <script type="module" src="/main.js"></script>
+</body>
+
+</html>
diff --git a/examples/webgpu-video-background-removal/main.js b/examples/webgpu-video-background-removal/main.js
@@ -0,0 +1,128 @@
+import './style.css';
+
+import { env, AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';
+
+env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/';
+env.backends.onnx.wasm.numThreads = 1;
+
+// Reference the elements that we will need
+const status = document.getElementById('status');
+const container = document.getElementById('container');
+const canvas = document.getElementById('canvas');
+const outputCanvas = document.getElementById('output-canvas');
+const video = document.getElementById('video');
+const sizeSlider = document.getElementById('size');
+const sizeLabel = document.getElementById('size-value');
+const scaleSlider = document.getElementById('scale');
+const scaleLabel = document.getElementById('scale-value');
+
+function setStreamSize(width, height) {
+    video.width = outputCanvas.width = canvas.width = Math.round(width);
+    video.height = outputCanvas.height = canvas.height = Math.round(height);
+}
+
+status.textContent = 'Loading model...';
+
+// Load model and processor
+const model_id = 'Xenova/modnet';
+let model;
+try {
+    model = await AutoModel.from_pretrained(model_id, {
+        device: 'webgpu',
+        dtype: 'fp32', // TODO: add fp16 support
+    });
+} catch (err) {
+    status.textContent = err.message;
+    alert(err.message)
+    throw err;
+}
+
+const processor = await AutoProcessor.from_pretrained(model_id);
+
+// Set up controls
+let size = 256;
+processor.feature_extractor.size = { shortest_edge: size };
+sizeSlider.addEventListener('input', () => {
+    size = Number(sizeSlider.value);
+    processor.feature_extractor.size = { shortest_edge: size };
+    sizeLabel.textContent = size;
+});
+sizeSlider.disabled = false;
+
+let scale = 0.5;
+scaleSlider.addEventListener('input', () => {
+    scale = Number(scaleSlider.value);
+    setStreamSize(video.videoWidth * scale, video.videoHeight * scale);
+    scaleLabel.textContent = scale;
+});
+scaleSlider.disabled = false;
+
+status.textContent = 'Ready';
+
+let isProcessing = false;
+let previousTime;
+const context = canvas.getContext('2d', { willReadFrequently: true });
+const outputContext = outputCanvas.getContext('2d', { willReadFrequently: true });
+function updateCanvas() {
+    const { width, height } = canvas;
+
+    if (!isProcessing) {
+        isProcessing = true;
+        (async function () {
+            // Read the current frame from the video
+            context.drawImage(video, 0, 0, width, height);
+            const currentFrame = context.getImageData(0, 0, width, height);
+            const image = new RawImage(currentFrame.data, width, height, 4);
+
+            // Pre-process image
+            const inputs = await processor(image);
+
+            // Predict alpha matte
+            const { output } = await model({ input: inputs.pixel_values });
+
+            const mask = await RawImage.fromTensor(output[0].mul(255).to('uint8')).resize(width, height);
+
+            // Update alpha channel
+            const outPixelData = currentFrame;
+            for (let i = 0; i < mask.data.length; ++i) {
+                outPixelData.data[4 * i + 3] = mask.data[i];
+            }
+            outputContext.putImageData(outPixelData, 0, 0);
+
+            if (previousTime !== undefined) {
+                const fps = 1000 / (performance.now() - previousTime);
+                status.textContent = `FPS: ${fps.toFixed(2)}`;
+            }
+            previousTime = performance.now();
+
+            isProcessing = false;
+        })();
+    }
+
+    window.requestAnimationFrame(updateCanvas);
+}
+
+// Start the video stream
+navigator.mediaDevices.getUserMedia(
+    { video: true }, // Ask for video
+).then((stream) => {
+    // Set up the video and canvas elements.
+    video.srcObject = stream;
+    video.play();
+
+    const videoTrack = stream.getVideoTracks()[0];
+    const { width, height } = videoTrack.getSettings();
+
+    setStreamSize(width * scale, height * scale);
+
+    // Set container width and height depending on the image aspect ratio
+    const ar = width / height;
+    const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405];
+    container.style.width = `${cw}px`;
+    container.style.height = `${ch}px`;
+
+    // Start the animation loop
+    setTimeout(updateCanvas, 50);
+}).catch((error) => {
+    alert(error);
+});
diff --git a/examples/webgpu-video-background-removal/package.json b/examples/webgpu-video-background-removal/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "webgpu-video-background-removal",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "devDependencies": {
+    "vite": "^5.0.12"
+  },
+  "dependencies": {
+    "@xenova/transformers": "^3.0.0"
+  }
+}
diff --git a/examples/webgpu-video-background-removal/style.css b/examples/webgpu-video-background-removal/style.css
@@ -0,0 +1,87 @@
+* {
+  box-sizing: border-box;
+  padding: 0;
+  margin: 0;
+  font-family: sans-serif;
+}
+
+html,
+body {
+  height: 100%;
+}
+
+body {
+  padding: 16px 32px;
+}
+
+body,
+#container {
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  align-items: center;
+}
+
+#controls {
+  display: flex;
+  padding: 1rem;
+  gap: 1rem;
+}
+
+#controls>div {
+  text-align: center;
+}
+
+h1,
+h4 {
+  text-align: center;
+}
+
+h4 {
+  margin-top: 0.5rem;
+}
+
+#container {
+  position: relative;
+  width: 720px;
+  height: 405px;
+  max-width: 100%;
+  max-height: 100%;
+  border: 2px dashed #D1D5DB;
+  border-radius: 0.75rem;
+  overflow: hidden;
+  margin-top: 1rem;
+  background-size: 100% 100%;
+  background-position: center;
+  background-repeat: no-repeat;
+}
+
+#overlay,
+canvas {
+  position: absolute;
+  width: 100%;
+  height: 100%;
+}
+
+#status {
+  min-height: 16px;
+  margin: 8px 0;
+}
+
+.bounding-box {
+  position: absolute;
+  box-sizing: border-box;
+  border: solid 2px;
+}
+
+.bounding-box-label {
+  color: white;
+  position: absolute;
+  font-size: 12px;
+  margin: -16px 0 0 -2px;
+  padding: 1px;
+}
+
+#video, #canvas {
+  display: none;
+}
diff --git a/examples/webgpu-video-background-removal/vite.config.js b/examples/webgpu-video-background-removal/vite.config.js
@@ -0,0 +1,6 @@
+import { defineConfig } from 'vite';
+export default defineConfig({
+  build: {
+    target: 'esnext'
+  }
+});