-
Notifications
You must be signed in to change notification settings - Fork 2
/
main_facerec.py
194 lines (177 loc) · 8.26 KB
/
main_facerec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
'''
To execute simply run:
main_facerec.py
To input new user:
main_facerec.py --mode "input"
'''
import cv2
from align_custom import AlignCustom
from face_feature import FaceFeature
from mtcnn_detect import MTCNNDetect
from database import *
from notif import *
from tf_graph import FaceRecGraph
import argparse
import sys
import json
import numpy as np
import imutils
import datetime
from notify_run import Notify
nama_detected_count={}
def main(args):
mode = args.mode
if(mode == "camera"):
camera_recog()
elif mode == "input":
create_manual_data();
else:
raise ValueError("Unimplemented mode")
'''
Description:
Images from Video Capture -> detect faces' regions -> crop those faces and align them
-> each cropped face is categorized in 3 types: Center, Left, Right
-> Extract 128D vectors( face features)
-> Search for matching subjects in the dataset based on the types of face positions.
-> The preexisitng face 128D vector with the shortest distance to the 128D vector of the face on screen is most likely a match
(Distance threshold is 0.6, percentage threshold is 70%)
'''
def camera_recog():
notify = Notify()
print("[INFO] camera sensor warming up...")
#vs = cv2.VideoCapture(0); #get input from webcam
#vs = cv2.VideoCapture("rtsp://192.168.0.10:554/user=admin&password=&channel=1&stream=0.sdp?")
#vs = cv2.VideoCapture("rtsp://admin:gspe12345@192.168.0.26:554/PSIA/streaming/channels/801")
vs = cv2.VideoCapture("rtsp://admin:gspe12345@192.168.0.39:554/PSIA/streaming/channels/801")
#vs = cv2.VideoCapture("rtsp://10.8.250.9:554/user=admin&password=56789E&channel=9&stream=0.sdp?")
#vs = cv2.VideoCapture("rtsp://10.8.250.13:554/user=admin&password=56789E&channel=14&stream=0.sdp?")
#vs = cv2.VideoCapture("rtsp://10.8.250.13:554/user=admin&password=56789E&channel=12&stream=0.sdp?")
while True:
_,frame = vs.read();
#frame = imutils.resize(frame, width = 1400)
#u can certainly add a roi here but for the sake of a demo i'll just leave it as simple as this
rects, landmarks = face_detect.detect_face(frame,30);#min face size is set to 80x80
aligns = []
positions = []
for (i, rect) in enumerate(rects):
aligned_face, face_pos = aligner.align(160,frame,landmarks[i])
if len(aligned_face) == 160 and len(aligned_face[0]) == 160:
aligns.append(aligned_face)
positions.append(face_pos)
else:
print("Align face failed") #log
if(len(aligns) > 0):
features_arr = extract_feature.get_features(aligns)
recog_data = findPeople(features_arr,positions)
for (i,rect) in enumerate(rects):
ts = time.time()
timestamp = datetime.datetime.fromtimestamp(ts).strftime('%H:%M:%S')
cv2.rectangle(frame,(rect[0],rect[1]),(rect[0] + rect[2],rect[1]+rect[3]),(255,0,0)) #draw bounding box for the face
#cv2.putText(frame,recog_data[i][0]+" - "+str(recog_data[i][1])+"%",(rect[0],rect[1]),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),1,cv2.LINE_AA)
cv2.putText(frame,recog_data[i][0],(rect[0],rect[1]),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),1,cv2.LINE_AA)
if recog_data[i][0] != 'Unknown' and recog_data[i][1] >= 90:
nama_detected_count.setdefault(recog_data[i][0], []).append(recog_data[i][0])
print(nama_detected_count)
kamera="kamera 1"
if len(nama_detected_count[recog_data[i][0]])>=5:
nama_detected_count.clear()
if timestamp>'06:00:00' and timestamp<'08:45:00':
status="Tepat Waktu"#
insertdata= data(recog_data[i][0],kamera,frame)
insertdatang= datang(recog_data[i][0],kamera,status,frame)
elif timestamp>'08:45:00' and timestamp<'17:30:00':
status="Terlambat"
insertdata= data(recog_data[i][0],kamera,frame)
insertdatang= datang(recog_data[i][0],kamera,status,frame)
elif timestamp>'17:30:00' and timestamp<'23:59:00':
insertdata= data(recog_data[i][0],kamera,frame)
insertbalik= balik(recog_data[i][0],kamera,frame)
else:
insertdata= data(recog_data[i][0],kamera,frame)
cv2.imshow("Frame",frame)
key = cv2.waitKey(5) & 0xFF
if key == ord("q"):
break
vs.release() # cleanup the camera and close any open windows
'''
facerec_128D.txt Data Structure:
{
"Person ID": {
"Center": [[128D vector]],
"Left": [[128D vector]],
"Right": [[128D Vector]]
}
}
This function basically does a simple linear search for
^the 128D vector with the min distance to the 128D vector of the face on screen
'''
def findPeople(features_arr, positions, thres = 0.6, percent_thres = 70):
'''
:param features_arr: a list of 128d Features of all faces on screen
:param positions: a list of face position types of all faces on screen
:param thres: distance threshold
:return: person name and percentage
'''
f = open('./facerec_128D.txt','r')
data_set = json.loads(f.read());
returnRes = [];
for (i,features_128D) in enumerate(features_arr):
result = "Unknown";
smallest = sys.maxsize
for person in data_set.keys():
person_data = data_set[person][positions[i]];
for data in person_data:
#distance = np.sqrt(np.sum(np.square(data-features_128D)))
if(distance < smallest):
smallest = distance;
result = person;
percentage = min(100, 100 * thres / smallest)
if percentage <= percent_thres :
result = "Unknown"
returnRes.append((result,percentage))
return returnRes
'''
Description:
User input his/her name or ID -> Images from Video Capture -> detect the face -> crop the face and align it
-> face is then categorized in 3 types: Center, Left, Right
-> Extract 128D vectors( face features)
-> Append each newly extracted face 128D vector to its corresponding position type (Center, Left, Right)
-> Press Q to stop capturing
-> Find the center ( the mean) of those 128D vectors in each category. ( np.mean(...) )
-> Save
'''
def create_manual_data():
vs = cv2.VideoCapture('rtsp://admin:gspe12345@10.20.1.86:554/PSIA/streaming/channels/101'); #get input from webcam
print("Please input new user ID:")
new_name = input(); #ez python input()
f = open('./facerec_128D.txt','r');
data_set = json.loads(f.read());
person_imgs = {"Left" : [], "Right": [], "Center": []};
person_features = {"Left" : [], "Right": [], "Center": []};
print("Please start turning slowly. Press 'q' to save and add this new user to the dataset");
while True:
_, frame = vs.read();
rects, landmarks = face_detect.detect_face(frame, 30); # min face size is set to 80x80
print(rects)
for (i, rect) in enumerate(rects):
aligned_frame, pos = aligner.align(160,frame,landmarks[i]);
if len(aligned_frame) == 160 and len(aligned_frame[0]) == 160:
person_imgs[pos].append(aligned_frame)
cv2.imshow("Captured face", aligned_frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
for pos in person_imgs: #there r some exceptions here, but I'll just leave it as this to keep it simple
person_features[pos] = [np.mean(extract_feature.get_features(person_imgs[pos]),axis=0).tolist()]
data_set[new_name] = person_features;
f = open('./facerec_128D.txt', 'w');
f.write(json.dumps(data_set))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--mode", type=str, help="Run camera recognition", default="camera")
args = parser.parse_args(sys.argv[1:]);
FRGraph = FaceRecGraph();
aligner = AlignCustom();
extract_feature = FaceFeature(FRGraph)
face_detect = MTCNNDetect(FRGraph, scale_factor=2); #scale_factor, rescales image for faster detection
main(args);