Skip to content

Commit

Permalink
Create video object detection demo (#607)
Browse files Browse the repository at this point in the history
  • Loading branch information
xenova authored Feb 27, 2024
1 parent 7772d1d commit 271c6f1
Show file tree
Hide file tree
Showing 7 changed files with 1,824 additions and 0 deletions.
24 changes: 24 additions & 0 deletions examples/video-object-detection/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*

node_modules
dist
dist-ssr
*.local

# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
43 changes: 43 additions & 0 deletions examples/video-object-detection/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Transformers.js | Real-time object detection</title>
</head>

<body>
<h1>
Real-time object detection w/
<a href="http://github.com/xenova/transformers.js" target="_blank">🤗 Transformers.js</a>
</h1>
<h4>
Runs locally in your browser, powered by
<a href="https://github.com/WongKinYiu/yolov9" target="_blank">YOLOv9</a>
</h4>
<div id="container">
<video id="video" autoplay muted playsinline></video>
<canvas id="canvas" width="360" height="240"></canvas>
<div id="overlay"></div>
</div>
<div id="controls">
<div>
<label>Image size</label>
(<label id="size-value">128</label>)
<br>
<input id="size" type="range" min="64" max="256" step="32" value="128" disabled>
</div>
<div>
<label>Threshold</label>
(<label id="threshold-value">0.25</label>)
<br>
<input id="threshold" type="range" min="0.01" max="1" step="0.01" value="0.25" disabled>
</div>
</div>
<label id="status"></label>

<script type="module" src="/main.js"></script>
</body>

</html>
146 changes: 146 additions & 0 deletions examples/video-object-detection/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import './style.css';

import { env, AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';

// Since we will download the model from the Hugging Face Hub, we can skip the local model check
env.allowLocalModels = false;

// Proxy the WASM backend to prevent the UI from freezing
env.backends.onnx.wasm.proxy = true;

// Reference the elements that we will need
const status = document.getElementById('status');
const container = document.getElementById('container');
const overlay = document.getElementById('overlay');
const canvas = document.getElementById('canvas');
const video = document.getElementById('video');
const thresholdSlider = document.getElementById('threshold');
const thresholdLabel = document.getElementById('threshold-value');
const sizeSlider = document.getElementById('size');
const sizeLabel = document.getElementById('size-value');

status.textContent = 'Loading model...';

// Load model and processor
const model_id = 'Xenova/gelan-c_all';
const model = await AutoModel.from_pretrained(model_id);
const processor = await AutoProcessor.from_pretrained(model_id);

// Set up controls
let threshold = 0.25;
thresholdSlider.addEventListener('input', () => {
threshold = Number(thresholdSlider.value);
thresholdLabel.textContent = threshold.toFixed(2);
});
thresholdSlider.disabled = false;

let size = 128;
processor.feature_extractor.size = { shortest_edge: size };
sizeSlider.addEventListener('input', () => {
size = Number(sizeSlider.value);
processor.feature_extractor.size = { shortest_edge: size };
sizeLabel.textContent = size;
});
sizeSlider.disabled = false;

status.textContent = 'Ready';

const COLOURS = [
"#EF4444", "#4299E1", "#059669",
"#FBBF24", "#4B52B1", "#7B3AC2",
"#ED507A", "#1DD1A1", "#F3873A",
"#4B5563", "#DC2626", "#1852B4",
"#18A35D", "#F59E0B", "#4059BE",
"#6027A5", "#D63D60", "#00AC9B",
"#E64A19", "#272A34"
]

// Render a bounding box and label on the image
function renderBox([xmin, ymin, xmax, ymax, score, id], [w, h]) {
if (score < threshold) return; // Skip boxes with low confidence

// Generate a random color for the box
const color = COLOURS[id % COLOURS.length];

// Draw the box
const boxElement = document.createElement('div');
boxElement.className = 'bounding-box';
Object.assign(boxElement.style, {
borderColor: color,
left: 100 * xmin / w + '%',
top: 100 * ymin / h + '%',
width: 100 * (xmax - xmin) / w + '%',
height: 100 * (ymax - ymin) / h + '%',
})

// Draw label
const labelElement = document.createElement('span');
labelElement.textContent = `${model.config.id2label[id]} (${(100 * score).toFixed(2)}%)`;
labelElement.className = 'bounding-box-label';
labelElement.style.backgroundColor = color;

boxElement.appendChild(labelElement);
overlay.appendChild(boxElement);
}

let isProcessing = false;
let previousTime;
const context = canvas.getContext('2d', { willReadFrequently: true });
function updateCanvas() {
const { width, height } = canvas;
context.drawImage(video, 0, 0, width, height);

if (!isProcessing) {
isProcessing = true;
(async function () {
// Read the current frame from the video
const pixelData = context.getImageData(0, 0, width, height).data;
const image = new RawImage(pixelData, width, height, 4);

// Process the image and run the model
const inputs = await processor(image);
const { outputs } = await model(inputs);

// Update UI
overlay.innerHTML = '';

const sizes = inputs.reshaped_input_sizes[0].reverse();
outputs.tolist().forEach(x => renderBox(x, sizes));

if (previousTime !== undefined) {
const fps = 1000 / (performance.now() - previousTime);
status.textContent = `FPS: ${fps.toFixed(2)}`;
}
previousTime = performance.now();
isProcessing = false;
})();
}

window.requestAnimationFrame(updateCanvas);
}

// Start the video stream
navigator.mediaDevices.getUserMedia(
{ video: true }, // Ask for video
).then((stream) => {
// Set up the video and canvas elements.
video.srcObject = stream;
video.play();

const videoTrack = stream.getVideoTracks()[0];
const { width, height } = videoTrack.getSettings();

canvas.width = width;
canvas.height = height;

// Set container width and height depending on the image aspect ratio
const ar = width / height;
const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405];
container.style.width = `${cw}px`;
container.style.height = `${ch}px`;

// Start the animation loop
window.requestAnimationFrame(updateCanvas);
}).catch((error) => {
alert(error);
});
Loading

0 comments on commit 271c6f1

Please sign in to comment.