人脸识别(连载中)
根据之前人脸识别项目,自己做了个小的,准备放在家里的树莓派上做个智能安防。
正文
人脸识别技术,其实分为三个算法,人脸检测、人脸对齐、人脸特征编码。
人脸识别系统流程:
- 首先对已知人脸进行以上三个步骤最后得到对应的人脸特征编码,每个已知人脸保存1~3个特征编码
- 摄像头实时采集对人脸图像进行以上三个算法最终得到图像中的人脸特征编码(可能存在多个人脸)
- 通过计算实时图像中人脸特征编码与已知人脸特征编码的L2距离取最大值确定人脸身份
- 通过人脸检测box分辨率过滤小人脸。通过L2阈值过滤未知人脸的特征编码。
所需头文件
1 |
|
模型加载
所有模型采用torch.script固化,因此无需网络结构,代码部分只有模型加载和图像预处理部分。1
2
3
4
5
6
7
8
9
10
11def load_model(ret_pth, arc_path):
'''
ret_pth: 人脸检测模型路径
arc_path:人脸识别模型路径
'''
ret_net = torch.jit.load(ret_pth)
reg_net = torch.jit.load(arc_path)
device = torch.device("cpu")
detect = ret_net.to(device)
recog = reg_net.to(device)
return detect,recog,device
人脸检测
下表为在网络爬取的人脸图像数据集上多种人脸检测算法对比效果
因为爬取图像较多,所以并未统计实际包含人脸数目,对于检测算法实际检测出的人脸结果也仅仅大致检查了下,并未仔细统计,因此下表仅供参考。
算法 | 模型 | 检测出人脸数量 | 所用时间 |
---|---|---|---|
MTCNN | / | 10935张 | 6749.72s |
Retinaface | ResNet50 | 19083张 | 9153.78s |
mobileNet | 16026张 | 1179.37s | |
Dlib | / | 12329张 | 4318.97s |
Facebox | mobileNet | 12375张 | 567.14s |
最后选择Retinaface进行封装,固化模型下载链接如下:
ResNet50
MobileNet
代码如下1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65def detect(detect_net,device,img_raw):
‘’‘
detect_net:模型
device: cpu or gpu 加载器
img_raw: cv图像
’‘’
# 模型加载
img = np.float32(img_raw)
# 图像预处理
im_height, im_width, _ = img.shape
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
scale = scale.to(device)
# 检测
loc, conf, landms = detect_net(img) # 匹配好的先验框编码后的位置,置信度,人脸 landmark
# 检测结果解码
priorbox = PriorBox(cfg_re50, image_size=(im_height, im_width))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
boxes = decode(loc.data.squeeze(0), prior_data, cfg_re50['variance'])
boxes = boxes * scale / 1
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_re50['variance'])
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
scale1 = scale1.to(device)
landms = landms * scale1 / 1
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > cfg_re50['confidence_threshold'])[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:cfg_re50['top_k']]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, cfg_re50['nms_threshold'])
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:cfg_re50['keep_top_k'], :]
landms = landms[:cfg_re50['keep_top_k'], :]
dets = np.concatenate((dets, landms), axis=1)
boxes = []
for b in dets:
if b[4] > cfg_re50['vis_thres']:
boxes.append(((int(b[0]), int(b[1])),(int(b[2]),int(b[3])),[[b[5],b[6]],[b[7],b[8]],[b[9],b[10]],[b[11],b[12]],[b[13],b[14]]],b[4]))
return boxes
解码工具类PriorBox.py1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93import torch
from itertools import product as product
from math import ceil
cfg_re50 = {
'min_sizes': [[16, 32], [64, 128], [256, 512]],
'steps': [8, 16, 32],
'variance': [0.1, 0.2],
'clip': False,
#配置参数
'confidence_threshold':0.02,
'nms_threshold':0.4,
'top_k':5000,
'keep_top_k':750,
'vis_thres':0.6
}
def decode(loc, priors, variances):
boxes = torch.cat((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes
def decode_landm(pre, priors, variances):
landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
), dim=1)
return landms
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
class PriorBox(object):
def __init__(self, cfg, image_size=None, phase='train'):
super(PriorBox, self).__init__()
self.min_sizes = cfg['min_sizes']
self.steps = cfg['steps']
self.clip = cfg['clip']
self.image_size = image_size
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
self.name = "s"
def forward(self):
anchors = []
for k, f in enumerate(self.feature_maps):
min_sizes = self.min_sizes[k]
for i, j in product(range(f[0]), range(f[1])):
for min_size in min_sizes:
s_kx = min_size / self.image_size[1]
s_ky = min_size / self.image_size[0]
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
for cy, cx in product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]
# back to torch land
output = torch.Tensor(anchors).view(-1, 4)
if self.clip:
output.clamp_(max=1, min=0)
return output
人脸对齐
最常用的映射变换,根据上一步人脸检测到的关键点对人脸进行摆正对齐。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
import cv2
REFERENCE_FACIAL_POINTS = [
[30.29459953, 51.69630051],
[65.53179932, 51.50139999],
[48.02519989, 71.73660278],
[33.54930115, 92.3655014],
[62.72990036, 92.20410156]
]
DEFAULT_CROP_SIZE = (112, 112)
class FaceWarpException(Exception):
def __str__(self):
return 'In File {}:{}'.format(
__file__, super.__str__(self))
def get_reference_facial_points(output_size=None,
inner_padding_factor=0.0,
outer_padding=(0, 0),
default_square=False):
tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
if default_square:
size_diff = max(tmp_crop_size) - tmp_crop_size
tmp_5pts += size_diff / 2
tmp_crop_size += size_diff
if (output_size and
output_size[0] == tmp_crop_size[0] and
output_size[1] == tmp_crop_size[1]):
return tmp_5pts
if (inner_padding_factor == 0 and
outer_padding == (0, 0)):
if output_size is None:
#print('No paddings to do: return default reference points')
return tmp_5pts
else:
raise FaceWarpException(
'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
# check output size
if not (0 <= inner_padding_factor <= 1.0):
raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
and output_size is None):
output_size = tmp_crop_size * \
(1 + inner_padding_factor * 2).astype(np.int32)
output_size += np.array(outer_padding)
#print(' deduced from paddings, output_size = ', output_size)
if not (outer_padding[0] < output_size[0]
and outer_padding[1] < output_size[1]):
raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
'and outer_padding[1] < output_size[1])')
if inner_padding_factor > 0:
size_diff = tmp_crop_size * inner_padding_factor * 2
tmp_5pts += size_diff / 2
tmp_crop_size += np.round(size_diff).astype(np.int32)
size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
raise FaceWarpException('Must have (output_size - outer_padding)'
'= some_scale * (crop_size * (1.0 + inner_padding_factor)')
scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
tmp_5pts = tmp_5pts * scale_factor
tmp_crop_size = size_bf_outer_pad
reference_5point = tmp_5pts + np.array(outer_padding)
tmp_crop_size = output_size
return reference_5point
def get_affine_transform_matrix(src_pts, dst_pts):
tfm = np.float32([[1, 0, 0], [0, 1, 0]])
n_pts = src_pts.shape[0]
ones = np.ones((n_pts, 1), src_pts.dtype)
src_pts_ = np.hstack([src_pts, ones])
dst_pts_ = np.hstack([dst_pts, ones])
A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
if rank == 3:
tfm = np.float32([
[A[0, 0], A[1, 0], A[2, 0]],
[A[0, 1], A[1, 1], A[2, 1]]
])
elif rank == 2:
tfm = np.float32([
[A[0, 0], A[1, 0], 0],
[A[0, 1], A[1, 1], 0]
])
return tfm
def warp_and_crop_face(src_img,
facial_pts,
reference_pts=None,
crop_size=(96, 112),
align_type='smilarity'):
ref_pts = np.float32(reference_pts)
ref_pts_shp = ref_pts.shape
if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
raise FaceWarpException(
'reference_pts.shape must be (K,2) or (2,K) and K>2')
if ref_pts_shp[0] == 2:
ref_pts = ref_pts.T
src_pts = np.float32(facial_pts)
src_pts_shp = src_pts.shape
if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
raise FaceWarpException(
'facial_pts.shape must be (K,2) or (2,K) and K>2')
if src_pts_shp[0] == 2:
src_pts = src_pts.T
# #print('--->src_pts:\n', src_pts
# #print('--->ref_pts\n', ref_pts
if src_pts.shape != ref_pts.shape:
raise FaceWarpException(
'facial_pts and reference_pts must have the same shape')
if align_type is 'cv2_affine':
tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
else:
tfm = get_affine_transform_matrix(src_pts, ref_pts)
face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
return face_img
人脸识别
采用insightface,固化模型如下:
编码函数调用如下:1
2
3
4
5
6
7
8def recognition(rec_net,face,device):
emb = rec_net(torch_transform(face).to(device).unsqueeze(0))
return emb
torch_transform = trans2.Compose([
trans2.ToTensor(),
trans2.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
主函数
1 |
|
本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!