diff --git a/examples/webgpu-video-background-removal/.gitignore b/examples/webgpu-video-background-removal/.gitignore new file mode 100644 index 000000000..a547bf36d --- /dev/null +++ b/examples/webgpu-video-background-removal/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/examples/webgpu-video-background-removal/index.html b/examples/webgpu-video-background-removal/index.html new file mode 100644 index 000000000..59e0a8428 --- /dev/null +++ b/examples/webgpu-video-background-removal/index.html @@ -0,0 +1,43 @@ + + + + + + + Transformers.js | Real-time background removal + + + +

+ Real-time background removal w/ + 🤗 Transformers.js +

+

+ Runs locally in your browser, powered by + MODNet +

+
+ + + +
+
+
+ + () +
+ +
+
+ + () +
+ +
+
+ + + + + + \ No newline at end of file diff --git a/examples/webgpu-video-background-removal/main.js b/examples/webgpu-video-background-removal/main.js new file mode 100644 index 000000000..620f21afb --- /dev/null +++ b/examples/webgpu-video-background-removal/main.js @@ -0,0 +1,128 @@ +import './style.css'; + +import { env, AutoModel, AutoProcessor, RawImage } from '@xenova/transformers'; + +env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.1/dist/'; +env.backends.onnx.wasm.numThreads = 1; + +// Reference the elements that we will need +const status = document.getElementById('status'); +const container = document.getElementById('container'); +const canvas = document.getElementById('canvas'); +const outputCanvas = document.getElementById('output-canvas'); +const video = document.getElementById('video'); +const sizeSlider = document.getElementById('size'); +const sizeLabel = document.getElementById('size-value'); +const scaleSlider = document.getElementById('scale'); +const scaleLabel = document.getElementById('scale-value'); + +function setStreamSize(width, height) { + video.width = outputCanvas.width = canvas.width = Math.round(width); + video.height = outputCanvas.height = canvas.height = Math.round(height); +} + +status.textContent = 'Loading model...'; + +// Load model and processor +const model_id = 'Xenova/modnet'; +let model; +try { + model = await AutoModel.from_pretrained(model_id, { + device: 'webgpu', + dtype: 'fp32', // TODO: add fp16 support + }); +} catch (err) { + status.textContent = err.message; + alert(err.message) + throw err; +} + +const processor = await AutoProcessor.from_pretrained(model_id); + +// Set up controls +let size = 256; +processor.feature_extractor.size = { shortest_edge: size }; +sizeSlider.addEventListener('input', () => { + size = Number(sizeSlider.value); + processor.feature_extractor.size = { shortest_edge: size }; + sizeLabel.textContent = size; +}); +sizeSlider.disabled = false; + +let scale = 0.5; +scaleSlider.addEventListener('input', () => { + scale = Number(scaleSlider.value); + setStreamSize(video.videoWidth * scale, video.videoHeight * scale); + scaleLabel.textContent = scale; +}); +scaleSlider.disabled = false; + +status.textContent = 'Ready'; + +let isProcessing = false; +let previousTime; +const context = canvas.getContext('2d', { willReadFrequently: true }); +const outputContext = outputCanvas.getContext('2d', { willReadFrequently: true }); +function updateCanvas() { + const { width, height } = canvas; + + if (!isProcessing) { + isProcessing = true; + (async function () { + // Read the current frame from the video + context.drawImage(video, 0, 0, width, height); + const currentFrame = context.getImageData(0, 0, width, height); + const image = new RawImage(currentFrame.data, width, height, 4); + + // Pre-process image + const inputs = await processor(image); + + // Predict alpha matte + const { output } = await model({ input: inputs.pixel_values }); + + const mask = await RawImage.fromTensor(output[0].mul(255).to('uint8')).resize(width, height); + + // Update alpha channel + const outPixelData = currentFrame; + for (let i = 0; i < mask.data.length; ++i) { + outPixelData.data[4 * i + 3] = mask.data[i]; + } + outputContext.putImageData(outPixelData, 0, 0); + + if (previousTime !== undefined) { + const fps = 1000 / (performance.now() - previousTime); + status.textContent = `FPS: ${fps.toFixed(2)}`; + } + previousTime = performance.now(); + + isProcessing = false; + })(); + } + + window.requestAnimationFrame(updateCanvas); +} + +// Start the video stream +navigator.mediaDevices.getUserMedia( + { video: true }, // Ask for video +).then((stream) => { + // Set up the video and canvas elements. + video.srcObject = stream; + video.play(); + + const videoTrack = stream.getVideoTracks()[0]; + const { width, height } = videoTrack.getSettings(); + + setStreamSize(width * scale, height * scale); + + // Set container width and height depending on the image aspect ratio + const ar = width / height; + const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405]; + container.style.width = `${cw}px`; + container.style.height = `${ch}px`; + + // Start the animation loop + setTimeout(updateCanvas, 50); +}).catch((error) => { + alert(error); +}); diff --git a/examples/webgpu-video-background-removal/package.json b/examples/webgpu-video-background-removal/package.json new file mode 100644 index 000000000..9ebe47afe --- /dev/null +++ b/examples/webgpu-video-background-removal/package.json @@ -0,0 +1,17 @@ +{ + "name": "webgpu-video-background-removal", + "private": true, + "version": "0.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "devDependencies": { + "vite": "^5.0.12" + }, + "dependencies": { + "@xenova/transformers": "^3.0.0" + } +} diff --git a/examples/webgpu-video-background-removal/style.css b/examples/webgpu-video-background-removal/style.css new file mode 100644 index 000000000..a86729e1c --- /dev/null +++ b/examples/webgpu-video-background-removal/style.css @@ -0,0 +1,87 @@ +* { + box-sizing: border-box; + padding: 0; + margin: 0; + font-family: sans-serif; +} + +html, +body { + height: 100%; +} + +body { + padding: 16px 32px; +} + +body, +#container { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; +} + +#controls { + display: flex; + padding: 1rem; + gap: 1rem; +} + +#controls>div { + text-align: center; +} + +h1, +h4 { + text-align: center; +} + +h4 { + margin-top: 0.5rem; +} + +#container { + position: relative; + width: 720px; + height: 405px; + max-width: 100%; + max-height: 100%; + border: 2px dashed #D1D5DB; + border-radius: 0.75rem; + overflow: hidden; + margin-top: 1rem; + background-size: 100% 100%; + background-position: center; + background-repeat: no-repeat; +} + +#overlay, +canvas { + position: absolute; + width: 100%; + height: 100%; +} + +#status { + min-height: 16px; + margin: 8px 0; +} + +.bounding-box { + position: absolute; + box-sizing: border-box; + border: solid 2px; +} + +.bounding-box-label { + color: white; + position: absolute; + font-size: 12px; + margin: -16px 0 0 -2px; + padding: 1px; +} + +#video, #canvas { + display: none; +} diff --git a/examples/webgpu-video-background-removal/vite.config.js b/examples/webgpu-video-background-removal/vite.config.js new file mode 100644 index 000000000..6c32f52df --- /dev/null +++ b/examples/webgpu-video-background-removal/vite.config.js @@ -0,0 +1,6 @@ +import { defineConfig } from 'vite'; +export default defineConfig({ + build: { + target: 'esnext' + } +});