-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
147 lines (122 loc) · 5.45 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import argparse
import cv2
import os
from face_detect import get_face_embeddings, is_same_person
def scan_video(args):
# Initialize video capture
video_capture = cv2.VideoCapture(args.video_path)
# Initialize variables
face_encodings = []
# Create a directory to save the faces
face_output_dir = args.face_output_dir
os.makedirs(face_output_dir, exist_ok=True)
frame_output_dir = args.frame_output_dir
if frame_output_dir is not None:
os.makedirs(frame_output_dir, exist_ok=True)
frame_count = 0
processed_frame_count = 0
skip_frames = args.skip_frame # Process every 5th frame
conf_threshold = args.confidence_threshold
while True:
ret = video_capture.grab()
if not ret:
break
frame_count += 1
if frame_count % skip_frames == 0: # processing frame
processed_frame_count += 1
status, frame = video_capture.retrieve() # Decode processing frame
# Convert the frame to RGB (DeepFace expects RGB)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Detect and encode faces
embeddings = get_face_embeddings(rgb_frame)
if embeddings is not None:
face_found = 0
for emb in embeddings:
if emb["face_confidence"] >= conf_threshold:
if emb['facial_area']['left_eye'] is None or emb['facial_area']['right_eye'] is None:
# we want to skip the frame if any of the eyes are not detected to ensure the facing is right
continue
# save frame_num to dict
emb['frame_num'] = frame_count
face_encodings.append(emb)
face_found += 1
print(
f"Found {face_found} faces in frame {frame_count}")
# Draw bounding boxes, with the confidence score on the frame
if embeddings is not None:
for face_data in embeddings:
x = face_data['facial_area']['x']
y = face_data['facial_area']['y']
w = face_data['facial_area']['w']
h = face_data['facial_area']['h']
conf = face_data['face_confidence']
cv2.rectangle(frame, (x, y), (x + w, y + h),
(0, 0, 255), 2)
cv2.putText(frame, f"{conf:.2f}", (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
# save the frame
if frame_output_dir:
cv2.imwrite(
f'{frame_output_dir}/frame_{frame_count}.jpg', frame)
print("Extracting unique faces...")
# Extract embeddings and remove duplicates
unique_face_encodings = []
for encoding in face_encodings:
is_unique = True
for unique_encoding in unique_face_encodings:
# Compare the embeddings
# DeepFace.verify accepts List as precalulated embeddings
face1 = encoding['embedding']
face2 = unique_encoding['embedding']
if is_same_person(face1, face2):
is_unique = False
break
if is_unique:
unique_face_encodings.append(encoding)
print(f"Found {len(unique_face_encodings)} unique faces in the video.")
# save the frame with face only
face_id = 0
for encoding in unique_face_encodings:
print("looping through unique_face_encodings")
frame_num = encoding['frame_num']
video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
ret, frame = video_capture.read()
if not ret:
print("Error reading video frame while doing the final capture. Skipping...")
continue
x = encoding['facial_area']['x']
y = encoding['facial_area']['y']
w = encoding['facial_area']['w']
h = encoding['facial_area']['h']
# handle outbound face
w = w + x if x < 0 else w
h = h + y if y < 0 else h
x = x if x > 0 else 0
y = y if y > 0 else 0
face = frame[y:y+h, x:x+w]
print(f"Saving face_{face_id}.jpg")
cv2.imwrite(f'{face_output_dir}/face_{face_id}.jpg', face)
face_id += 1
# Release video capture
video_capture.release()
def arg_parser():
parser = argparse.ArgumentParser(
description='Extract unique faces from a video')
parser.add_argument('video_path', type=str, help='Path to the video file')
parser.add_argument('--skip_frame', type=int, default=1,
help='number of frames to be skipped during sampling')
parser.add_argument('--face_output_dir', type=str,
default='output_faces', help='Path to save the faces')
parser.add_argument('--frame_output_dir', type=str, default=None,
help='Path to save the frames. Default is None which doesn not save frames')
parser.add_argument('--confidence_threshold', type=float, default=0.9,
help='Confidence threshold for face detection')
return parser.parse_args()
if __name__ == '__main__':
args = arg_parser()
# calucate time taken
import time
start = time.time()
scan_video(args)
end = time.time()
print(f"Time taken: {end - start} seconds")