-
Notifications
You must be signed in to change notification settings - Fork 797
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create video background removal demo
- Loading branch information
Showing
6 changed files
with
305 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Logs | ||
logs | ||
*.log | ||
npm-debug.log* | ||
yarn-debug.log* | ||
yarn-error.log* | ||
pnpm-debug.log* | ||
lerna-debug.log* | ||
|
||
node_modules | ||
dist | ||
dist-ssr | ||
*.local | ||
|
||
# Editor directories and files | ||
.vscode/* | ||
!.vscode/extensions.json | ||
.idea | ||
.DS_Store | ||
*.suo | ||
*.ntvs* | ||
*.njsproj | ||
*.sln | ||
*.sw? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
|
||
<head> | ||
<meta charset="UTF-8" /> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | ||
<title>Transformers.js | Real-time background removal</title> | ||
</head> | ||
|
||
<body> | ||
<h1> | ||
Real-time background removal w/ | ||
<a href="http://github.com/xenova/transformers.js" target="_blank">🤗 Transformers.js</a> | ||
</h1> | ||
<h4> | ||
Runs locally in your browser, powered by | ||
<a href="https://huggingface.co/Xenova/modnet" target="_blank">MODNet</a> | ||
</h4> | ||
<div id="container"> | ||
<video id="video" autoplay muted playsinline></video> | ||
<canvas id="canvas" width="360" height="240"></canvas> | ||
<canvas id="output-canvas" width="360" height="240"></canvas> | ||
</div> | ||
<div id="controls"> | ||
<div title="Read frames from your webcam and process them at a lower size (lower = faster)"> | ||
<label>Stream scale</label> | ||
(<label id="scale-value">0.5</label>) | ||
<br> | ||
<input id="scale" type="range" min="0.1" max="1" step="0.1" value="0.5" disabled> | ||
</div> | ||
<div title="The length of the shortest edge of the image (lower = faster)"> | ||
<label>Image size</label> | ||
(<label id="size-value">256</label>) | ||
<br> | ||
<input id="size" type="range" min="64" max="512" step="32" value="256" disabled> | ||
</div> | ||
</div> | ||
<label id="status"></label> | ||
|
||
<script type="module" src="/main.js"></script> | ||
</body> | ||
|
||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import './style.css'; | ||
|
||
import { env, AutoModel, AutoProcessor, RawImage } from '@xenova/transformers'; | ||
|
||
env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/'; | ||
env.backends.onnx.wasm.numThreads = 1; | ||
|
||
// Reference the elements that we will need | ||
const status = document.getElementById('status'); | ||
const container = document.getElementById('container'); | ||
const canvas = document.getElementById('canvas'); | ||
const outputCanvas = document.getElementById('output-canvas'); | ||
const video = document.getElementById('video'); | ||
const sizeSlider = document.getElementById('size'); | ||
const sizeLabel = document.getElementById('size-value'); | ||
const scaleSlider = document.getElementById('scale'); | ||
const scaleLabel = document.getElementById('scale-value'); | ||
|
||
function setStreamSize(width, height) { | ||
video.width = outputCanvas.width = canvas.width = Math.round(width); | ||
video.height = outputCanvas.height = canvas.height = Math.round(height); | ||
} | ||
|
||
status.textContent = 'Loading model...'; | ||
|
||
// Load model and processor | ||
const model_id = 'Xenova/modnet'; | ||
let model; | ||
try { | ||
model = await AutoModel.from_pretrained(model_id, { | ||
device: 'webgpu', | ||
dtype: 'fp32', // TODO: add fp16 support | ||
}); | ||
} catch (err) { | ||
status.textContent = err.message; | ||
alert(err.message) | ||
throw err; | ||
} | ||
|
||
const processor = await AutoProcessor.from_pretrained(model_id); | ||
|
||
// Set up controls | ||
let size = 256; | ||
processor.feature_extractor.size = { shortest_edge: size }; | ||
sizeSlider.addEventListener('input', () => { | ||
size = Number(sizeSlider.value); | ||
processor.feature_extractor.size = { shortest_edge: size }; | ||
sizeLabel.textContent = size; | ||
}); | ||
sizeSlider.disabled = false; | ||
|
||
let scale = 0.5; | ||
scaleSlider.addEventListener('input', () => { | ||
scale = Number(scaleSlider.value); | ||
setStreamSize(video.videoWidth * scale, video.videoHeight * scale); | ||
scaleLabel.textContent = scale; | ||
}); | ||
scaleSlider.disabled = false; | ||
|
||
status.textContent = 'Ready'; | ||
|
||
let isProcessing = false; | ||
let previousTime; | ||
const context = canvas.getContext('2d', { willReadFrequently: true }); | ||
const outputContext = outputCanvas.getContext('2d', { willReadFrequently: true }); | ||
function updateCanvas() { | ||
const { width, height } = canvas; | ||
|
||
if (!isProcessing) { | ||
isProcessing = true; | ||
(async function () { | ||
// Read the current frame from the video | ||
context.drawImage(video, 0, 0, width, height); | ||
const currentFrame = context.getImageData(0, 0, width, height); | ||
const image = new RawImage(currentFrame.data, width, height, 4); | ||
|
||
// Pre-process image | ||
const inputs = await processor(image); | ||
|
||
// Predict alpha matte | ||
const { output } = await model({ input: inputs.pixel_values }); | ||
|
||
const mask = await RawImage.fromTensor(output[0].mul(255).to('uint8')).resize(width, height); | ||
|
||
// Update alpha channel | ||
const outPixelData = currentFrame; | ||
for (let i = 0; i < mask.data.length; ++i) { | ||
outPixelData.data[4 * i + 3] = mask.data[i]; | ||
} | ||
outputContext.putImageData(outPixelData, 0, 0); | ||
|
||
if (previousTime !== undefined) { | ||
const fps = 1000 / (performance.now() - previousTime); | ||
status.textContent = `FPS: ${fps.toFixed(2)}`; | ||
} | ||
previousTime = performance.now(); | ||
|
||
isProcessing = false; | ||
})(); | ||
} | ||
|
||
window.requestAnimationFrame(updateCanvas); | ||
} | ||
|
||
// Start the video stream | ||
navigator.mediaDevices.getUserMedia( | ||
{ video: true }, // Ask for video | ||
).then((stream) => { | ||
// Set up the video and canvas elements. | ||
video.srcObject = stream; | ||
video.play(); | ||
|
||
const videoTrack = stream.getVideoTracks()[0]; | ||
const { width, height } = videoTrack.getSettings(); | ||
|
||
setStreamSize(width * scale, height * scale); | ||
|
||
// Set container width and height depending on the image aspect ratio | ||
const ar = width / height; | ||
const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405]; | ||
container.style.width = `${cw}px`; | ||
container.style.height = `${ch}px`; | ||
|
||
// Start the animation loop | ||
setTimeout(updateCanvas, 50); | ||
}).catch((error) => { | ||
alert(error); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"name": "webgpu-video-background-removal", | ||
"private": true, | ||
"version": "0.0.0", | ||
"type": "module", | ||
"scripts": { | ||
"dev": "vite", | ||
"build": "vite build", | ||
"preview": "vite preview" | ||
}, | ||
"devDependencies": { | ||
"vite": "^5.0.12" | ||
}, | ||
"dependencies": { | ||
"@xenova/transformers": "^3.0.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
* { | ||
box-sizing: border-box; | ||
padding: 0; | ||
margin: 0; | ||
font-family: sans-serif; | ||
} | ||
|
||
html, | ||
body { | ||
height: 100%; | ||
} | ||
|
||
body { | ||
padding: 16px 32px; | ||
} | ||
|
||
body, | ||
#container { | ||
display: flex; | ||
flex-direction: column; | ||
justify-content: center; | ||
align-items: center; | ||
} | ||
|
||
#controls { | ||
display: flex; | ||
padding: 1rem; | ||
gap: 1rem; | ||
} | ||
|
||
#controls>div { | ||
text-align: center; | ||
} | ||
|
||
h1, | ||
h4 { | ||
text-align: center; | ||
} | ||
|
||
h4 { | ||
margin-top: 0.5rem; | ||
} | ||
|
||
#container { | ||
position: relative; | ||
width: 720px; | ||
height: 405px; | ||
max-width: 100%; | ||
max-height: 100%; | ||
border: 2px dashed #D1D5DB; | ||
border-radius: 0.75rem; | ||
overflow: hidden; | ||
margin-top: 1rem; | ||
background-size: 100% 100%; | ||
background-position: center; | ||
background-repeat: no-repeat; | ||
} | ||
|
||
#overlay, | ||
canvas { | ||
position: absolute; | ||
width: 100%; | ||
height: 100%; | ||
} | ||
|
||
#status { | ||
min-height: 16px; | ||
margin: 8px 0; | ||
} | ||
|
||
.bounding-box { | ||
position: absolute; | ||
box-sizing: border-box; | ||
border: solid 2px; | ||
} | ||
|
||
.bounding-box-label { | ||
color: white; | ||
position: absolute; | ||
font-size: 12px; | ||
margin: -16px 0 0 -2px; | ||
padding: 1px; | ||
} | ||
|
||
#video, #canvas { | ||
display: none; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import { defineConfig } from 'vite'; | ||
export default defineConfig({ | ||
build: { | ||
target: 'esnext' | ||
} | ||
}); |