-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
102 lines (80 loc) · 4 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from flask import Flask, render_template, Response
import cv2
import dlib
import numpy as np
import torch
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
app = Flask(__name__)
# Initialize dlib's face detector (HOG-based)
detector = dlib.get_frontal_face_detector()
# Initialize background subtractor (KNN) with parameters
backSub = cv2.createBackgroundSubtractorKNN(history=500, dist2Threshold=400, detectShadows=False)
# Load the pre-trained SegFormer model
model_name = "nvidia/segformer-b0-finetuned-ade-512-512"
processor = SegformerImageProcessor.from_pretrained(model_name)
model = SegformerForSemanticSegmentation.from_pretrained(model_name)
@app.route('/')
def index():
return render_template('index.html')
def generate_frames():
cap = cv2.VideoCapture(0) # Ensure correct camera index
if not cap.isOpened():
print("Error: Could not open video.")
exit()
# Get original dimensions of the frame
ret, frame = cap.read()
height, width = frame.shape[:2]
aspect_ratio = width / height
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Maintain original aspect ratio
target_width = 640
target_height = int(target_width / aspect_ratio)
frame = cv2.resize(frame, (target_width, target_height))
# Increase brightness
frame = cv2.convertScaleAbs(frame, alpha=1.2, beta=30) # Adjust alpha (contrast) and beta (brightness)
# Convert frame to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces in the frame
faces = detector(gray)
# Apply background subtraction
fgMask = backSub.apply(frame)
for face in faces:
x, y, w, h = (face.left(), face.top(), face.width(), face.height())
face_roi = frame[y:y+h, x:x+w]
mask_roi = fgMask[y:y+h, x:x+w]
# Ensure mask_roi is not empty before processing
if mask_roi.size > 0:
# Apply Gaussian Blur to smooth the mask
blurred_mask = cv2.GaussianBlur(mask_roi, (15, 15), 0)
_, clean_mask = cv2.threshold(blurred_mask, 128, 255, cv2.THRESH_BINARY)
# Morphological operations to remove noise and fill holes
kernel = np.ones((5, 5), np.uint8)
clean_mask = cv2.morphologyEx(clean_mask, cv2.MORPH_CLOSE, kernel)
clean_mask = cv2.morphologyEx(clean_mask, cv2.MORPH_OPEN, kernel)
# Use inpainting to fill black patches
clean_mask = cv2.inpaint(clean_mask, clean_mask, 3, cv2.INPAINT_TELEA)
# Use SegFormer for semantic segmentation
input_image = cv2.resize(frame, (512, 512)) # Reduce input size for faster processing
inputs = processor(images=input_image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
semantic_mask = logits.argmax(dim=1).squeeze().cpu().numpy().astype(np.uint8)
semantic_mask = cv2.resize(semantic_mask, (frame.shape[1], frame.shape[0]))
# Align the mask properly with face region
face_mask = semantic_mask[y:y+h, x:x+w]
face_with_colors = cv2.bitwise_and(face_roi, face_roi, mask=face_mask)
# Prepare segmented face as frame
ret, buffer = cv2.imencode('.jpg', face_with_colors)
frame = buffer.tobytes()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
cap.release()
@app.route('/video_feed')
def video_feed():
return Response(generate_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)