Face Detection and Recognition
Face Detection and Recognition¶
Overview¶
Face detection and recognition is one of the most practical applications of computer vision. We will learn various face processing techniques using Haar Cascade, dlib, and the face_recognition library.
Difficulty: ****
Prerequisites: Object detection basics, feature detection, image transformations
Table of Contents¶
- Haar Cascade Face/Eye Detection
- dlib Face Detector (HOG-based)
- dlib Face Landmarks (68 Points)
- LBPH Face Recognition
- face_recognition Library
- Real-time Face Detection
- Practice Problems
1. Haar Cascade Face/Eye Detection¶
Face Detection Principle¶
Haar Cascade Face Detection Process:
1. Compute integral image
+-----------------+
| Original image | -> Integral image (fast feature computation)
+-----------------+
2. Scan with windows of various sizes
+-----------------------------+
| +--+ |
| | | -> Small window |
| +--+ |
| +-----+ |
| | | -> Medium window|
| +-----+ |
| +--------+ |
| | | -> Large |
| +--------+ |
+-----------------------------+
3. Apply Cascade classifier at each window
Window -> Stage 1 -> Stage 2 -> ... -> Stage N
(face?) (face?) (face!)
4. Group detection results (remove duplicates)
Basic Face Detection¶
import cv2
import numpy as np
def detect_faces_haar(img, scale_factor=1.1, min_neighbors=5,
min_size=(30, 30)):
"""Face detection using Haar Cascade"""
# Load Cascade classifier
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
# Grayscale conversion
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
gray = img.copy()
# Histogram equalization (lighting correction)
gray = cv2.equalizeHist(gray)
# Face detection
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=scale_factor,
minNeighbors=min_neighbors,
minSize=min_size,
flags=cv2.CASCADE_SCALE_IMAGE
)
return faces
# Usage example
img = cv2.imread('photo.jpg')
faces = detect_faces_haar(img)
# Draw results
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Face center point
center = (x + w//2, y + h//2)
cv2.circle(img, center, 3, (0, 0, 255), -1)
print(f"Faces detected: {len(faces)}")
cv2.imshow('Face Detection', img)
cv2.waitKey(0)
Face and Eye Detection Together¶
import cv2
class HaarFaceEyeDetector:
"""Haar Cascade-based face/eye detector"""
def __init__(self):
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
self.eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
self.eye_glasses_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye_tree_eyeglasses.xml'
)
def detect(self, img, detect_eyes=True):
"""Detect face and eyes"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.equalizeHist(gray)
results = []
# Face detection
faces = self.face_cascade.detectMultiScale(
gray, 1.1, 5, minSize=(60, 60)
)
for (x, y, w, h) in faces:
face_data = {
'face_rect': (x, y, w, h),
'eyes': []
}
if detect_eyes:
# Detect eyes in top 50% of face
roi_gray = gray[y:y+h//2, x:x+w]
# Try regular eye detection
eyes = self.eye_cascade.detectMultiScale(
roi_gray, 1.1, 3, minSize=(20, 20)
)
# Try glasses detector if regular detection fails
if len(eyes) < 2:
eyes = self.eye_glasses_cascade.detectMultiScale(
roi_gray, 1.1, 3, minSize=(20, 20)
)
# Select best two eyes
eyes = self._select_best_eyes(eyes, w)
for (ex, ey, ew, eh) in eyes:
face_data['eyes'].append((x + ex, y + ey, ew, eh))
results.append(face_data)
return results
def _select_best_eyes(self, eyes, face_width):
"""Select best two eyes"""
if len(eyes) <= 2:
return eyes
# Filter by eye size and y-coordinate
eyes = sorted(eyes, key=lambda e: e[1]) # Sort by y-coordinate
# Select from top 4 candidates
candidates = eyes[:4]
# Separate left/right by x-coordinate
mid_x = face_width // 2
left_eyes = [e for e in candidates if e[0] + e[2]//2 < mid_x]
right_eyes = [e for e in candidates if e[0] + e[2]//2 >= mid_x]
result = []
if left_eyes:
result.append(left_eyes[0])
if right_eyes:
result.append(right_eyes[0])
return result
def draw_results(self, img, results):
"""Visualize results"""
output = img.copy()
for face_data in results:
x, y, w, h = face_data['face_rect']
# Face rectangle
cv2.rectangle(output, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Eyes
for (ex, ey, ew, eh) in face_data['eyes']:
center = (ex + ew//2, ey + eh//2)
radius = (ew + eh) // 4
cv2.circle(output, center, radius, (255, 0, 0), 2)
return output
# Usage example
detector = HaarFaceEyeDetector()
img = cv2.imread('portrait.jpg')
results = detector.detect(img)
output = detector.draw_results(img, results)
cv2.imshow('Detection', output)
2. dlib Face Detector (HOG-based)¶
dlib Installation¶
# dlib installation (requires C++ compiler)
pip install dlib
# Or use conda (easier)
conda install -c conda-forge dlib
HOG-based Detector¶
import cv2
import dlib
import numpy as np
# HOG-based face detector
detector = dlib.get_frontal_face_detector()
# Load image
img = cv2.imread('photo.jpg')
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Face detection
# Second argument: upsampling count (0=original, 1=2x, 2=4x)
faces = detector(rgb, 1)
print(f"Faces detected: {len(faces)}")
# Visualize results
for face in faces:
x1, y1 = face.left(), face.top()
x2, y2 = face.right(), face.bottom()
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Confidence score (dlib detector provides scores by default)
# Use detector.run() to get scores
cv2.imshow('dlib HOG Detection', img)
cv2.waitKey(0)
CNN-based Detector (More Accurate)¶
import cv2
import dlib
# CNN face detector (requires model file)
# Download: http://dlib.net/files/mmod_human_face_detector.dat.bz2
cnn_detector = dlib.cnn_face_detection_model_v1(
'mmod_human_face_detector.dat'
)
img = cv2.imread('photo.jpg')
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# CNN detection
detections = cnn_detector(rgb, 1)
for d in detections:
x1 = d.rect.left()
y1 = d.rect.top()
x2 = d.rect.right()
y2 = d.rect.bottom()
confidence = d.confidence
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(img, f'{confidence:.2f}', (x1, y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
Haar vs dlib Comparison¶
+----------------+------------------+------------------+
| Item | Haar Cascade | dlib HOG |
+----------------+------------------+------------------+
| Speed | Fast | Medium |
| Accuracy | Medium | High |
| False Positives| Many | Few |
| Profile Face | Separate model | Not supported |
| Small Faces | Detects well | Harder to detect |
| Installation | Included w/OpenCV| Separate install |
| Memory Usage | Low | Medium |
+----------------+------------------+------------------+
dlib CNN Detector:
- Most accurate but slow without GPU
- Detects profile faces well
- Excellent small face detection
3. dlib Face Landmarks (68 Points)¶
68-Point Landmark Structure¶
Face Landmarks 68 Points:
17-21 22-26
____ ____
0 / \ / \ 16
| | 36-41| |42-47 | |
| \____/ \____/ |
| 48-67 |
| / \ |
| / \ |
8 \________/
Point Groups:
- 0-16: Jawline
- 17-21: Left eyebrow
- 22-26: Right eyebrow
- 27-35: Nose
- 36-41: Left eye
- 42-47: Right eye
- 48-67: Mouth
- 48-59: Outer lip
- 60-67: Inner lip
Landmark Detection¶
import cv2
import dlib
import numpy as np
# Load detector and predictor
detector = dlib.get_frontal_face_detector()
# Download: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
def get_landmarks(img):
"""Detect face landmarks"""
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Face detection
faces = detector(rgb, 1)
all_landmarks = []
for face in faces:
# Predict landmarks
shape = predictor(rgb, face)
# Convert dlib shape to numpy array
landmarks = np.zeros((68, 2), dtype=np.int32)
for i in range(68):
landmarks[i] = (shape.part(i).x, shape.part(i).y)
all_landmarks.append({
'face_rect': (face.left(), face.top(),
face.right(), face.bottom()),
'landmarks': landmarks
})
return all_landmarks
def draw_landmarks(img, landmarks_data, draw_indices=False):
"""Visualize landmarks"""
output = img.copy()
for data in landmarks_data:
landmarks = data['landmarks']
# Draw all points
for i, (x, y) in enumerate(landmarks):
cv2.circle(output, (x, y), 2, (0, 255, 0), -1)
if draw_indices:
cv2.putText(output, str(i), (x-5, y-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 0, 0), 1)
# Draw connecting lines
# Jawline
for i in range(16):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(255, 255, 0), 1)
# Eyebrows
for i in range(17, 21):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(255, 255, 0), 1)
for i in range(22, 26):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(255, 255, 0), 1)
# Nose
for i in range(27, 30):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(255, 255, 0), 1)
for i in range(31, 35):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(255, 255, 0), 1)
# Eyes
eye_indices = [(36,41), (42,47)]
for start, end in eye_indices:
for i in range(start, end):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(0, 255, 255), 1)
cv2.line(output, tuple(landmarks[end]), tuple(landmarks[start]),
(0, 255, 255), 1)
# Mouth
for i in range(48, 59):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(0, 0, 255), 1)
cv2.line(output, tuple(landmarks[59]), tuple(landmarks[48]),
(0, 0, 255), 1)
for i in range(60, 67):
cv2.line(output, tuple(landmarks[i]), tuple(landmarks[i+1]),
(0, 0, 255), 1)
cv2.line(output, tuple(landmarks[67]), tuple(landmarks[60]),
(0, 0, 255), 1)
return output
# Usage example
img = cv2.imread('face.jpg')
landmarks_data = get_landmarks(img)
result = draw_landmarks(img, landmarks_data)
cv2.imshow('Landmarks', result)
Landmark Applications¶
import cv2
import dlib
import numpy as np
from scipy.spatial import distance as dist
class FaceLandmarkAnalyzer:
"""Face landmark analyzer"""
# Landmark index definitions
JAWLINE = list(range(0, 17))
LEFT_EYEBROW = list(range(17, 22))
RIGHT_EYEBROW = list(range(22, 27))
NOSE_BRIDGE = list(range(27, 31))
NOSE_TIP = list(range(31, 36))
LEFT_EYE = list(range(36, 42))
RIGHT_EYE = list(range(42, 48))
OUTER_LIP = list(range(48, 60))
INNER_LIP = list(range(60, 68))
def __init__(self, predictor_path):
self.detector = dlib.get_frontal_face_detector()
self.predictor = dlib.shape_predictor(predictor_path)
def get_landmarks(self, img):
"""Extract landmarks"""
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
faces = self.detector(rgb, 0)
if len(faces) == 0:
return None
shape = self.predictor(rgb, faces[0])
landmarks = np.array([[shape.part(i).x, shape.part(i).y]
for i in range(68)])
return landmarks
def eye_aspect_ratio(self, eye_points):
"""Compute Eye Aspect Ratio (EAR) - used for drowsiness detection"""
# Vertical distances
A = dist.euclidean(eye_points[1], eye_points[5])
B = dist.euclidean(eye_points[2], eye_points[4])
# Horizontal distance
C = dist.euclidean(eye_points[0], eye_points[3])
ear = (A + B) / (2.0 * C)
return ear
def mouth_aspect_ratio(self, mouth_points):
"""Compute Mouth Aspect Ratio (MAR) - used for yawn detection"""
# Vertical distances
A = dist.euclidean(mouth_points[2], mouth_points[10]) # 51, 59
B = dist.euclidean(mouth_points[4], mouth_points[8]) # 53, 57
# Horizontal distance
C = dist.euclidean(mouth_points[0], mouth_points[6]) # 49, 55
mar = (A + B) / (2.0 * C)
return mar
def get_face_angle(self, landmarks):
"""Compute face tilt angle"""
# Use eye center points
left_eye_center = landmarks[self.LEFT_EYE].mean(axis=0)
right_eye_center = landmarks[self.RIGHT_EYE].mean(axis=0)
# Compute angle
dY = right_eye_center[1] - left_eye_center[1]
dX = right_eye_center[0] - left_eye_center[0]
angle = np.degrees(np.arctan2(dY, dX))
return angle
def analyze_face(self, img):
"""Comprehensive face analysis"""
landmarks = self.get_landmarks(img)
if landmarks is None:
return None
# Eye analysis
left_eye = landmarks[self.LEFT_EYE]
right_eye = landmarks[self.RIGHT_EYE]
left_ear = self.eye_aspect_ratio(left_eye)
right_ear = self.eye_aspect_ratio(right_eye)
avg_ear = (left_ear + right_ear) / 2.0
# Mouth analysis
outer_lip = landmarks[self.OUTER_LIP]
mar = self.mouth_aspect_ratio(outer_lip)
# Face angle
angle = self.get_face_angle(landmarks)
return {
'landmarks': landmarks,
'eye_aspect_ratio': avg_ear,
'mouth_aspect_ratio': mar,
'face_angle': angle,
'eyes_closed': avg_ear < 0.2, # Threshold-based
'mouth_open': mar > 0.5
}
# Usage example
analyzer = FaceLandmarkAnalyzer('shape_predictor_68_face_landmarks.dat')
img = cv2.imread('face.jpg')
analysis = analyzer.analyze_face(img)
if analysis:
print(f"Eye Aspect Ratio (EAR): {analysis['eye_aspect_ratio']:.3f}")
print(f"Mouth Aspect Ratio (MAR): {analysis['mouth_aspect_ratio']:.3f}")
print(f"Face tilt: {analysis['face_angle']:.1f} degrees")
print(f"Eyes closed: {analysis['eyes_closed']}")
print(f"Mouth open: {analysis['mouth_open']}")
4. LBPH Face Recognition¶
Understanding LBP (Local Binary Patterns)¶
LBP: Represents pattern around each pixel as binary code
Surrounding pixels Comparison (> center?) Binary code
+---+---+---+ +---+---+---+
| 6 | 5 | 2 | | 1 | 1 | 0 | 11000011
+---+---+---+ +---+---+---+ = 195
| 7 |[4]| 1 | | 1 | | 0 |
+---+---+---+ +---+---+---+
| 8 | 3 | 2 | | 1 | 0 | 0 |
+---+---+---+ +---+---+---+
Compare center pixel (4) with neighbors:
6>4=1, 5>4=1, 2<4=0, 1<4=0, 2<4=0, 3<4=0, 8>4=1, 7>4=1
LBPH (LBP Histogram):
- Divide image into multiple cells
- Compute LBP histogram per cell
- Concatenate all histograms for feature vector
LBPH Face Recognizer¶
import cv2
import numpy as np
import os
class LBPHFaceRecognizer:
"""LBPH-based face recognizer"""
def __init__(self):
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
self.recognizer = cv2.face.LBPHFaceRecognizer_create(
radius=1, # LBP radius
neighbors=8, # Number of neighbors
grid_x=8, # Cells in x direction
grid_y=8 # Cells in y direction
)
self.label_names = {}
def prepare_training_data(self, data_dir):
"""Prepare training data"""
faces = []
labels = []
for label_id, person_name in enumerate(os.listdir(data_dir)):
person_dir = os.path.join(data_dir, person_name)
if not os.path.isdir(person_dir):
continue
self.label_names[label_id] = person_name
for img_name in os.listdir(person_dir):
img_path = os.path.join(person_dir, img_name)
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if img is None:
continue
# Face detection
detected_faces = self.face_cascade.detectMultiScale(
img, 1.1, 5, minSize=(50, 50)
)
for (x, y, w, h) in detected_faces:
face_roi = img[y:y+h, x:x+w]
# Size normalization
face_roi = cv2.resize(face_roi, (100, 100))
faces.append(face_roi)
labels.append(label_id)
return faces, labels
def train(self, faces, labels):
"""Train model"""
self.recognizer.train(faces, np.array(labels))
print(f"Training complete: {len(set(labels))} people, {len(faces)} images")
def save_model(self, path):
"""Save model"""
self.recognizer.save(path)
# Also save label names
np.save(path + '_labels.npy', self.label_names)
def load_model(self, path):
"""Load model"""
self.recognizer.read(path)
self.label_names = np.load(path + '_labels.npy',
allow_pickle=True).item()
def predict(self, img):
"""Face recognition"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
faces = self.face_cascade.detectMultiScale(gray, 1.1, 5, minSize=(50, 50))
results = []
for (x, y, w, h) in faces:
face_roi = gray[y:y+h, x:x+w]
face_roi = cv2.resize(face_roi, (100, 100))
label, confidence = self.recognizer.predict(face_roi)
# Lower confidence is better for LBPH
# Generally below 50 is very good match
name = self.label_names.get(label, "Unknown")
if confidence > 100:
name = "Unknown"
results.append({
'rect': (x, y, w, h),
'name': name,
'confidence': confidence
})
return results
def draw_results(self, img, results):
"""Visualize results"""
output = img.copy()
for result in results:
x, y, w, h = result['rect']
name = result['name']
conf = result['confidence']
# Color: green for recognized, red for unknown
color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)
cv2.rectangle(output, (x, y), (x+w, y+h), color, 2)
label = f"{name} ({conf:.1f})"
cv2.putText(output, label, (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
return output
# Usage example
"""
Data directory structure:
faces/
person1/
img1.jpg
img2.jpg
person2/
img1.jpg
img2.jpg
"""
# Training
recognizer = LBPHFaceRecognizer()
faces, labels = recognizer.prepare_training_data('faces')
recognizer.train(faces, labels)
recognizer.save_model('face_model.yml')
# Recognition
recognizer.load_model('face_model.yml')
test_img = cv2.imread('test.jpg')
results = recognizer.predict(test_img)
output = recognizer.draw_results(test_img, results)
cv2.imshow('Recognition', output)
5. face_recognition Library¶
Installation¶
pip install face_recognition
Basic Usage¶
import face_recognition
import cv2
import numpy as np
# Load image
img = face_recognition.load_image_file('photo.jpg')
# Face location detection
face_locations = face_recognition.face_locations(img)
# Or use CNN model (more accurate)
# face_locations = face_recognition.face_locations(img, model='cnn')
print(f"Faces detected: {len(face_locations)}")
# Face encoding (128-dimensional feature vector)
face_encodings = face_recognition.face_encodings(img, face_locations)
# Face landmarks
face_landmarks = face_recognition.face_landmarks(img, face_locations)
# Visualize results
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
for (top, right, bottom, left) in face_locations:
cv2.rectangle(img_bgr, (left, top), (right, bottom), (0, 255, 0), 2)
cv2.imshow('Detection', img_bgr)
Face Comparison and Recognition¶
import face_recognition
import cv2
import numpy as np
import os
class FaceRecognitionSystem:
"""face_recognition-based face recognition system"""
def __init__(self):
self.known_encodings = []
self.known_names = []
def add_face(self, img_path, name):
"""Add known face"""
img = face_recognition.load_image_file(img_path)
encodings = face_recognition.face_encodings(img)
if len(encodings) > 0:
self.known_encodings.append(encodings[0])
self.known_names.append(name)
print(f"'{name}' face registered")
return True
else:
print(f"No face found in '{img_path}'")
return False
def load_faces_from_directory(self, data_dir):
"""Load faces from directory"""
for person_name in os.listdir(data_dir):
person_dir = os.path.join(data_dir, person_name)
if not os.path.isdir(person_dir):
continue
for img_name in os.listdir(person_dir):
img_path = os.path.join(person_dir, img_name)
self.add_face(img_path, person_name)
print(f"Total {len(self.known_encodings)} faces loaded")
def recognize(self, img, tolerance=0.6):
"""Face recognition"""
# RGB conversion
if isinstance(img, str):
img = face_recognition.load_image_file(img)
elif len(img.shape) == 3 and img.shape[2] == 3:
# BGR to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Face detection and encoding
face_locations = face_recognition.face_locations(img)
face_encodings = face_recognition.face_encodings(img, face_locations)
results = []
for (top, right, bottom, left), encoding in zip(face_locations,
face_encodings):
# Compare with known faces
matches = face_recognition.compare_faces(
self.known_encodings, encoding, tolerance=tolerance
)
# Compute distances (lower = more similar)
distances = face_recognition.face_distance(
self.known_encodings, encoding
)
name = "Unknown"
confidence = 0.0
if True in matches:
# Find closest match
best_match_idx = np.argmin(distances)
if matches[best_match_idx]:
name = self.known_names[best_match_idx]
confidence = 1 - distances[best_match_idx]
results.append({
'location': (top, right, bottom, left),
'name': name,
'confidence': confidence
})
return results
def save_encodings(self, path):
"""Save encodings"""
data = {
'encodings': self.known_encodings,
'names': self.known_names
}
np.save(path, data)
def load_encodings(self, path):
"""Load encodings"""
data = np.load(path, allow_pickle=True).item()
self.known_encodings = data['encodings']
self.known_names = data['names']
# Usage example
system = FaceRecognitionSystem()
# Register known faces
system.add_face('known_faces/person1.jpg', 'Alice')
system.add_face('known_faces/person2.jpg', 'Bob')
# Or load from directory
# system.load_faces_from_directory('known_faces')
# Recognition
test_img = cv2.imread('test.jpg')
results = system.recognize(test_img)
# Visualization
for result in results:
top, right, bottom, left = result['location']
name = result['name']
conf = result['confidence']
cv2.rectangle(test_img, (left, top), (right, bottom), (0, 255, 0), 2)
label = f"{name} ({conf:.2%})"
cv2.putText(test_img, label, (left, top - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
cv2.imshow('Recognition', test_img)
Face Clustering¶
import face_recognition
from sklearn.cluster import DBSCAN
import numpy as np
import os
def cluster_faces(image_dir, output_dir='clustered'):
"""Group similar faces together"""
encodings = []
image_paths = []
face_locations_list = []
# Extract face encodings from all images
for img_name in os.listdir(image_dir):
img_path = os.path.join(image_dir, img_name)
img = face_recognition.load_image_file(img_path)
locations = face_recognition.face_locations(img)
face_encodings = face_recognition.face_encodings(img, locations)
for encoding, location in zip(face_encodings, locations):
encodings.append(encoding)
image_paths.append(img_path)
face_locations_list.append(location)
# DBSCAN clustering
# eps: distance threshold for same cluster
# min_samples: minimum samples to form a cluster
clt = DBSCAN(metric='euclidean', eps=0.5, min_samples=2)
clt.fit(encodings)
# Organize results by cluster
label_ids = np.unique(clt.labels_)
num_unique = len(label_ids[label_ids > -1]) # -1 is noise
print(f"Unique people found: {num_unique}")
# Save images by cluster
os.makedirs(output_dir, exist_ok=True)
for label_id in label_ids:
indices = np.where(clt.labels_ == label_id)[0]
if label_id == -1:
folder = os.path.join(output_dir, 'unknown')
else:
folder = os.path.join(output_dir, f'person_{label_id}')
os.makedirs(folder, exist_ok=True)
print(f"Cluster {label_id}: {len(indices)} faces")
return clt.labels_, image_paths, face_locations_list
6. Real-time Face Detection¶
Basic Real-time Detection¶
import cv2
import time
def realtime_face_detection():
"""Real-time face detection (Haar Cascade)"""
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
fps_counter = 0
fps = 0
start_time = time.time()
while True:
ret, frame = cap.read()
if not ret:
break
# Reduce frame size (speed up)
small_frame = cv2.resize(frame, None, fx=0.5, fy=0.5)
gray = cv2.cvtColor(small_frame, cv2.COLOR_BGR2GRAY)
# Face detection
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.2, # Increase for faster detection
minNeighbors=4,
minSize=(30, 30)
)
# Scale coordinates back
for (x, y, w, h) in faces:
x, y, w, h = x*2, y*2, w*2, h*2
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Compute FPS
fps_counter += 1
elapsed = time.time() - start_time
if elapsed >= 1.0:
fps = fps_counter / elapsed
fps_counter = 0
start_time = time.time()
# Display FPS
cv2.putText(frame, f'FPS: {fps:.1f}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Face Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
realtime_face_detection()
Real-time Face Recognition¶
import cv2
import face_recognition
import numpy as np
import time
class RealtimeFaceRecognition:
"""Real-time face recognition system"""
def __init__(self):
self.known_encodings = []
self.known_names = []
self.process_every_n_frames = 3 # Process every nth frame
def add_known_face(self, img_path, name):
"""Add known face"""
img = face_recognition.load_image_file(img_path)
encoding = face_recognition.face_encodings(img)
if encoding:
self.known_encodings.append(encoding[0])
self.known_names.append(name)
def run(self, camera_id=0):
"""Run real-time recognition"""
cap = cv2.VideoCapture(camera_id)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
frame_count = 0
face_locations = []
face_names = []
fps_time = time.time()
fps = 0
while True:
ret, frame = cap.read()
if not ret:
break
# Reduce size (speed up)
small_frame = cv2.resize(frame, None, fx=0.25, fy=0.25)
rgb_small = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
# Process every nth frame
if frame_count % self.process_every_n_frames == 0:
# Face detection
face_locations = face_recognition.face_locations(rgb_small)
face_encodings = face_recognition.face_encodings(
rgb_small, face_locations
)
face_names = []
for encoding in face_encodings:
name = "Unknown"
if self.known_encodings:
matches = face_recognition.compare_faces(
self.known_encodings, encoding, tolerance=0.6
)
distances = face_recognition.face_distance(
self.known_encodings, encoding
)
if len(distances) > 0:
best_idx = np.argmin(distances)
if matches[best_idx]:
name = self.known_names[best_idx]
face_names.append(name)
# Display results (scale coordinates back)
for (top, right, bottom, left), name in zip(face_locations,
face_names):
top *= 4
right *= 4
bottom *= 4
left *= 4
color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)
cv2.rectangle(frame, (left, top), (right, bottom), color, 2)
# Name background
cv2.rectangle(frame, (left, bottom - 25), (right, bottom),
color, cv2.FILLED)
cv2.putText(frame, name, (left + 6, bottom - 6),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
# Compute and display FPS
frame_count += 1
if frame_count % 30 == 0:
fps = 30 / (time.time() - fps_time)
fps_time = time.time()
cv2.putText(frame, f'FPS: {fps:.1f}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
cv2.imshow('Face Recognition', frame)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('s'):
# Save current frame with 's' key
cv2.imwrite(f'capture_{frame_count}.jpg', frame)
cap.release()
cv2.destroyAllWindows()
# Usage example
system = RealtimeFaceRecognition()
system.add_known_face('alice.jpg', 'Alice')
system.add_known_face('bob.jpg', 'Bob')
system.run()
Performance Optimization Tips¶
+-------------------------------------------------------------+
| Real-time Processing Optimization |
+-------------------------------------------------------------+
| |
| 1. Frame size reduction |
| - Reducing to 1/4 size decreases processing 16x |
| - small = cv2.resize(frame, None, fx=0.25, fy=0.25) |
| |
| 2. Frame skipping |
| - Not every frame needs processing |
| - Detect every 2-5 frames |
| - Use previous results for intermediate frames |
| |
| 3. ROI-based processing |
| - Only search around previous detection location |
| - Combine tracking with detection |
| |
| 4. Model selection |
| - Haar: Fastest, lower accuracy |
| - dlib HOG: Medium |
| - dlib CNN: Slow, GPU recommended |
| - face_recognition: dlib-based |
| |
| 5. Multi-threading |
| - Separate detection and display threads |
| - Use Queue for frame passing |
| |
| 6. GPU acceleration |
| - dlib CUDA build |
| - OpenCV DNN (CUDA backend) |
+-------------------------------------------------------------+
7. Practice Problems¶
Problem 1: Attendance Check System¶
Implement a face recognition-based attendance check system.
Requirements: - Manage registered user face DB - Real-time webcam recognition - Record attendance time (CSV or DB) - Prevent duplicate attendance (within time period)
Hint
import datetime
import csv
class AttendanceSystem:
def __init__(self):
self.attendance_log = {} # {name: last_check_time}
self.cooldown = 3600 # 1 hour
def mark_attendance(self, name):
now = datetime.datetime.now()
last_check = self.attendance_log.get(name)
if last_check is None or (now - last_check).seconds > self.cooldown:
self.attendance_log[name] = now
self.save_to_csv(name, now)
return True
return False
Problem 2: Drowsiness Detection System¶
Implement a drowsiness detection system using Eye Aspect Ratio (EAR).
Requirements: - Real-time eye state monitoring - Alert when EAR is below threshold for certain duration - Sound or visual alert - Display current EAR value
Hint
import dlib
from scipy.spatial import distance as dist
import pygame # For alert sound
EAR_THRESHOLD = 0.25
CONSEC_FRAMES = 20 # Consecutive frame count
def eye_aspect_ratio(eye):
A = dist.euclidean(eye[1], eye[5])
B = dist.euclidean(eye[2], eye[4])
C = dist.euclidean(eye[0], eye[3])
return (A + B) / (2.0 * C)
counter = 0 # Consecutive frame counter
# Alert when EAR < THRESHOLD persists for CONSEC_FRAMES or more
Problem 3: Face Mosaic Processing¶
Write a program that applies mosaic to specific person's face.
Requirements: - Mosaic all faces except designated person - Or mosaic only designated person - Adjustable mosaic intensity
Hint
def mosaic_face(img, rect, scale=0.1):
x, y, w, h = rect
roi = img[y:y+h, x:x+w]
# Reduce then enlarge (mosaic effect)
small = cv2.resize(roi, None, fx=scale, fy=scale)
mosaic = cv2.resize(small, (w, h), interpolation=cv2.INTER_NEAREST)
img[y:y+h, x:x+w] = mosaic
return img
Problem 4: Face Alignment¶
Implement a program that aligns faces based on eye positions.
Requirements: - Detect both eye positions - Rotate to make eyes horizontal - Crop face region and normalize size
Hint
import numpy as np
def align_face(img, left_eye, right_eye, desired_size=(256, 256)):
# Compute angle between eyes
dY = right_eye[1] - left_eye[1]
dX = right_eye[0] - left_eye[0]
angle = np.degrees(np.arctan2(dY, dX))
# Rotation center (midpoint between eyes)
center = ((left_eye[0] + right_eye[0]) // 2,
(left_eye[1] + right_eye[1]) // 2)
# Rotation matrix
M = cv2.getRotationMatrix2D(center, angle, 1.0)
# Apply rotation
aligned = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
return aligned
Problem 5: Emotion Analysis System¶
Implement a simple emotion analysis system using face landmarks.
Requirements: - Analyze eye, eyebrow, and mouth shapes - Classify basic emotions (happy, sad, surprised, neutral) - Display emotion in real-time
Hint
# Example emotion judgment criteria:
# - Happy: Mouth corners raised (mouth ends y < mouth center y)
# - Surprised: Eyes and mouth wide open (high EAR, high MAR)
# - Sad: Eyebrows drooped, mouth corners down
# - Neutral: Little change
def analyze_emotion(landmarks):
# Mouth analysis
mouth = landmarks[48:68]
mouth_height = mouth[14][1] - mouth[10][1] # Mouth height
# Eye analysis
left_eye = landmarks[36:42]
ear = eye_aspect_ratio(left_eye)
# Rule-based judgment
if mouth_height > threshold and ear > threshold:
return "Surprised"
# ...
Next Steps¶
- 17_Video_Processing.md - VideoCapture, background subtraction, optical flow
References¶
- dlib Documentation
- face_recognition GitHub
- OpenCV Face Recognition
- 68 Face Landmarks
- Kazemi, V., & Sullivan, J. (2014). "One Millisecond Face Alignment with an Ensemble of Regression Trees"