Skip to content

Commit

Permalink
Create video background removal demo
Browse files Browse the repository at this point in the history
  • Loading branch information
xenova committed Mar 13, 2024
1 parent d24f764 commit 91c570c
Show file tree
Hide file tree
Showing 6 changed files with 305 additions and 0 deletions.
24 changes: 24 additions & 0 deletions examples/webgpu-video-background-removal/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*

node_modules
dist
dist-ssr
*.local

# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
43 changes: 43 additions & 0 deletions examples/webgpu-video-background-removal/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Transformers.js | Real-time background removal</title>
</head>

<body>
<h1>
Real-time background removal w/
<a href="http://github.com/xenova/transformers.js" target="_blank">🤗 Transformers.js</a>
</h1>
<h4>
Runs locally in your browser, powered by
<a href="https://huggingface.co/Xenova/modnet" target="_blank">MODNet</a>
</h4>
<div id="container">
<video id="video" autoplay muted playsinline></video>
<canvas id="canvas" width="360" height="240"></canvas>
<canvas id="output-canvas" width="360" height="240"></canvas>
</div>
<div id="controls">
<div title="Read frames from your webcam and process them at a lower size (lower = faster)">
<label>Stream scale</label>
(<label id="scale-value">0.5</label>)
<br>
<input id="scale" type="range" min="0.1" max="1" step="0.1" value="0.5" disabled>
</div>
<div title="The length of the shortest edge of the image (lower = faster)">
<label>Image size</label>
(<label id="size-value">256</label>)
<br>
<input id="size" type="range" min="64" max="512" step="32" value="256" disabled>
</div>
</div>
<label id="status"></label>

<script type="module" src="/main.js"></script>
</body>

</html>
128 changes: 128 additions & 0 deletions examples/webgpu-video-background-removal/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import './style.css';

import { env, AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';

env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/';
env.backends.onnx.wasm.numThreads = 1;

// Reference the elements that we will need
const status = document.getElementById('status');
const container = document.getElementById('container');
const canvas = document.getElementById('canvas');
const outputCanvas = document.getElementById('output-canvas');
const video = document.getElementById('video');
const sizeSlider = document.getElementById('size');
const sizeLabel = document.getElementById('size-value');
const scaleSlider = document.getElementById('scale');
const scaleLabel = document.getElementById('scale-value');

function setStreamSize(width, height) {
video.width = outputCanvas.width = canvas.width = Math.round(width);
video.height = outputCanvas.height = canvas.height = Math.round(height);
}

status.textContent = 'Loading model...';

// Load model and processor
const model_id = 'Xenova/modnet';
let model;
try {
model = await AutoModel.from_pretrained(model_id, {
device: 'webgpu',
dtype: 'fp32', // TODO: add fp16 support
});
} catch (err) {
status.textContent = err.message;
alert(err.message)
throw err;
}

const processor = await AutoProcessor.from_pretrained(model_id);

// Set up controls
let size = 256;
processor.feature_extractor.size = { shortest_edge: size };
sizeSlider.addEventListener('input', () => {
size = Number(sizeSlider.value);
processor.feature_extractor.size = { shortest_edge: size };
sizeLabel.textContent = size;
});
sizeSlider.disabled = false;

let scale = 0.5;
scaleSlider.addEventListener('input', () => {
scale = Number(scaleSlider.value);
setStreamSize(video.videoWidth * scale, video.videoHeight * scale);
scaleLabel.textContent = scale;
});
scaleSlider.disabled = false;

status.textContent = 'Ready';

let isProcessing = false;
let previousTime;
const context = canvas.getContext('2d', { willReadFrequently: true });
const outputContext = outputCanvas.getContext('2d', { willReadFrequently: true });
function updateCanvas() {
const { width, height } = canvas;

if (!isProcessing) {
isProcessing = true;
(async function () {
// Read the current frame from the video
context.drawImage(video, 0, 0, width, height);
const currentFrame = context.getImageData(0, 0, width, height);
const image = new RawImage(currentFrame.data, width, height, 4);

// Pre-process image
const inputs = await processor(image);

// Predict alpha matte
const { output } = await model({ input: inputs.pixel_values });

const mask = await RawImage.fromTensor(output[0].mul(255).to('uint8')).resize(width, height);

// Update alpha channel
const outPixelData = currentFrame;
for (let i = 0; i < mask.data.length; ++i) {
outPixelData.data[4 * i + 3] = mask.data[i];
}
outputContext.putImageData(outPixelData, 0, 0);

if (previousTime !== undefined) {
const fps = 1000 / (performance.now() - previousTime);
status.textContent = `FPS: ${fps.toFixed(2)}`;
}
previousTime = performance.now();

isProcessing = false;
})();
}

window.requestAnimationFrame(updateCanvas);
}

// Start the video stream
navigator.mediaDevices.getUserMedia(
{ video: true }, // Ask for video
).then((stream) => {
// Set up the video and canvas elements.
video.srcObject = stream;
video.play();

const videoTrack = stream.getVideoTracks()[0];
const { width, height } = videoTrack.getSettings();

setStreamSize(width * scale, height * scale);

// Set container width and height depending on the image aspect ratio
const ar = width / height;
const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405];
container.style.width = `${cw}px`;
container.style.height = `${ch}px`;

// Start the animation loop
setTimeout(updateCanvas, 50);
}).catch((error) => {
alert(error);
});
17 changes: 17 additions & 0 deletions examples/webgpu-video-background-removal/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "webgpu-video-background-removal",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"devDependencies": {
"vite": "^5.0.12"
},
"dependencies": {
"@xenova/transformers": "^3.0.0"
}
}
87 changes: 87 additions & 0 deletions examples/webgpu-video-background-removal/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
* {
box-sizing: border-box;
padding: 0;
margin: 0;
font-family: sans-serif;
}

html,
body {
height: 100%;
}

body {
padding: 16px 32px;
}

body,
#container {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
}

#controls {
display: flex;
padding: 1rem;
gap: 1rem;
}

#controls>div {
text-align: center;
}

h1,
h4 {
text-align: center;
}

h4 {
margin-top: 0.5rem;
}

#container {
position: relative;
width: 720px;
height: 405px;
max-width: 100%;
max-height: 100%;
border: 2px dashed #D1D5DB;
border-radius: 0.75rem;
overflow: hidden;
margin-top: 1rem;
background-size: 100% 100%;
background-position: center;
background-repeat: no-repeat;
}

#overlay,
canvas {
position: absolute;
width: 100%;
height: 100%;
}

#status {
min-height: 16px;
margin: 8px 0;
}

.bounding-box {
position: absolute;
box-sizing: border-box;
border: solid 2px;
}

.bounding-box-label {
color: white;
position: absolute;
font-size: 12px;
margin: -16px 0 0 -2px;
padding: 1px;
}

#video, #canvas {
display: none;
}
6 changes: 6 additions & 0 deletions examples/webgpu-video-background-removal/vite.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { defineConfig } from 'vite';
export default defineConfig({
build: {
target: 'esnext'
}
});

0 comments on commit 91c570c

Please sign in to comment.