人脸识别(连载中)

根据之前人脸识别项目,自己做了个小的,准备放在家里的树莓派上做个智能安防。

正文

人脸识别技术,其实分为三个算法,人脸检测、人脸对齐、人脸特征编码。
人脸识别系统流程:

  • 首先对已知人脸进行以上三个步骤最后得到对应的人脸特征编码,每个已知人脸保存1~3个特征编码
  • 摄像头实时采集对人脸图像进行以上三个算法最终得到图像中的人脸特征编码(可能存在多个人脸)
  • 通过计算实时图像中人脸特征编码与已知人脸特征编码的L2距离取最大值确定人脸身份
  • 通过人脸检测box分辨率过滤小人脸。通过L2阈值过滤未知人脸的特征编码。

所需头文件

1
2
3
from torchvision import transforms as trans2
import cv2
import numpy as np

模型加载

所有模型采用torch.script固化,因此无需网络结构,代码部分只有模型加载和图像预处理部分。

1
2
3
4
5
6
7
8
9
10
11
def load_model(ret_pth, arc_path):
'''
ret_pth: 人脸检测模型路径
arc_path:人脸识别模型路径
'''
ret_net = torch.jit.load(ret_pth)
reg_net = torch.jit.load(arc_path)
device = torch.device("cpu")
detect = ret_net.to(device)
recog = reg_net.to(device)
return detect,recog,device

人脸检测

下表为在网络爬取的人脸图像数据集上多种人脸检测算法对比效果

因为爬取图像较多,所以并未统计实际包含人脸数目,对于检测算法实际检测出的人脸结果也仅仅大致检查了下,并未仔细统计,因此下表仅供参考。

算法 模型 检测出人脸数量 所用时间
MTCNN / 10935张 6749.72s
Retinaface ResNet50 19083张 9153.78s
mobileNet 16026张 1179.37s
Dlib / 12329张 4318.97s
Facebox mobileNet 12375张 567.14s

最后选择Retinaface进行封装,固化模型下载链接如下:
ResNet50
MobileNet

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def detect(detect_net,device,img_raw): 
‘’‘
detect_net:模型
device: cpu or gpu 加载器
img_raw: cv图像
’‘’
# 模型加载
img = np.float32(img_raw)

# 图像预处理
im_height, im_width, _ = img.shape
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
scale = scale.to(device)
# 检测
loc, conf, landms = detect_net(img) # 匹配好的先验框编码后的位置,置信度,人脸 landmark

# 检测结果解码
priorbox = PriorBox(cfg_re50, image_size=(im_height, im_width))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
boxes = decode(loc.data.squeeze(0), prior_data, cfg_re50['variance'])
boxes = boxes * scale / 1
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_re50['variance'])
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
scale1 = scale1.to(device)
landms = landms * scale1 / 1
landms = landms.cpu().numpy()

# ignore low scores
inds = np.where(scores > cfg_re50['confidence_threshold'])[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]

# keep top-K before NMS
order = scores.argsort()[::-1][:cfg_re50['top_k']]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]

# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, cfg_re50['nms_threshold'])
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]

# keep top-K faster NMS
dets = dets[:cfg_re50['keep_top_k'], :]
landms = landms[:cfg_re50['keep_top_k'], :]
dets = np.concatenate((dets, landms), axis=1)
boxes = []
for b in dets:
if b[4] > cfg_re50['vis_thres']:
boxes.append(((int(b[0]), int(b[1])),(int(b[2]),int(b[3])),[[b[5],b[6]],[b[7],b[8]],[b[9],b[10]],[b[11],b[12]],[b[13],b[14]]],b[4]))
return boxes

解码工具类PriorBox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import torch
from itertools import product as product
from math import ceil

cfg_re50 = {
'min_sizes': [[16, 32], [64, 128], [256, 512]],
'steps': [8, 16, 32],
'variance': [0.1, 0.2],
'clip': False,
#配置参数
'confidence_threshold':0.02,
'nms_threshold':0.4,
'top_k':5000,
'keep_top_k':750,
'vis_thres':0.6
}

def decode(loc, priors, variances):
boxes = torch.cat((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes

def decode_landm(pre, priors, variances):
landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
), dim=1)
return landms


def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]

areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]

keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])

w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)

inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]

return keep

class PriorBox(object):
def __init__(self, cfg, image_size=None, phase='train'):
super(PriorBox, self).__init__()
self.min_sizes = cfg['min_sizes']
self.steps = cfg['steps']
self.clip = cfg['clip']
self.image_size = image_size
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
self.name = "s"

def forward(self):
anchors = []
for k, f in enumerate(self.feature_maps):
min_sizes = self.min_sizes[k]
for i, j in product(range(f[0]), range(f[1])):
for min_size in min_sizes:
s_kx = min_size / self.image_size[1]
s_ky = min_size / self.image_size[0]
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
for cy, cx in product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]

# back to torch land
output = torch.Tensor(anchors).view(-1, 4)
if self.clip:
output.clamp_(max=1, min=0)
return output

人脸对齐

最常用的映射变换,根据上一步人脸检测到的关键点对人脸进行摆正对齐。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

import numpy as np
import cv2

REFERENCE_FACIAL_POINTS = [
[30.29459953, 51.69630051],
[65.53179932, 51.50139999],
[48.02519989, 71.73660278],
[33.54930115, 92.3655014],
[62.72990036, 92.20410156]
]

DEFAULT_CROP_SIZE = (112, 112)

class FaceWarpException(Exception):
def __str__(self):
return 'In File {}:{}'.format(
__file__, super.__str__(self))

def get_reference_facial_points(output_size=None,
inner_padding_factor=0.0,
outer_padding=(0, 0),
default_square=False):

tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
tmp_crop_size = np.array(DEFAULT_CROP_SIZE)

if default_square:
size_diff = max(tmp_crop_size) - tmp_crop_size
tmp_5pts += size_diff / 2
tmp_crop_size += size_diff

if (output_size and
output_size[0] == tmp_crop_size[0] and
output_size[1] == tmp_crop_size[1]):
return tmp_5pts

if (inner_padding_factor == 0 and
outer_padding == (0, 0)):
if output_size is None:
#print('No paddings to do: return default reference points')
return tmp_5pts
else:
raise FaceWarpException(
'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))

# check output size
if not (0 <= inner_padding_factor <= 1.0):
raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')

if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
and output_size is None):
output_size = tmp_crop_size * \
(1 + inner_padding_factor * 2).astype(np.int32)
output_size += np.array(outer_padding)
#print(' deduced from paddings, output_size = ', output_size)

if not (outer_padding[0] < output_size[0]
and outer_padding[1] < output_size[1]):
raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
'and outer_padding[1] < output_size[1])')

if inner_padding_factor > 0:
size_diff = tmp_crop_size * inner_padding_factor * 2
tmp_5pts += size_diff / 2
tmp_crop_size += np.round(size_diff).astype(np.int32)

size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2

if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
raise FaceWarpException('Must have (output_size - outer_padding)'
'= some_scale * (crop_size * (1.0 + inner_padding_factor)')

scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
tmp_5pts = tmp_5pts * scale_factor

tmp_crop_size = size_bf_outer_pad

reference_5point = tmp_5pts + np.array(outer_padding)
tmp_crop_size = output_size

return reference_5point

def get_affine_transform_matrix(src_pts, dst_pts):
tfm = np.float32([[1, 0, 0], [0, 1, 0]])
n_pts = src_pts.shape[0]
ones = np.ones((n_pts, 1), src_pts.dtype)
src_pts_ = np.hstack([src_pts, ones])
dst_pts_ = np.hstack([dst_pts, ones])

A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)

if rank == 3:
tfm = np.float32([
[A[0, 0], A[1, 0], A[2, 0]],
[A[0, 1], A[1, 1], A[2, 1]]
])
elif rank == 2:
tfm = np.float32([
[A[0, 0], A[1, 0], 0],
[A[0, 1], A[1, 1], 0]
])
return tfm


def warp_and_crop_face(src_img,
facial_pts,
reference_pts=None,
crop_size=(96, 112),
align_type='smilarity'):

ref_pts = np.float32(reference_pts)
ref_pts_shp = ref_pts.shape
if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
raise FaceWarpException(
'reference_pts.shape must be (K,2) or (2,K) and K>2')

if ref_pts_shp[0] == 2:
ref_pts = ref_pts.T

src_pts = np.float32(facial_pts)
src_pts_shp = src_pts.shape
if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
raise FaceWarpException(
'facial_pts.shape must be (K,2) or (2,K) and K>2')

if src_pts_shp[0] == 2:
src_pts = src_pts.T

# #print('--->src_pts:\n', src_pts
# #print('--->ref_pts\n', ref_pts

if src_pts.shape != ref_pts.shape:
raise FaceWarpException(
'facial_pts and reference_pts must have the same shape')

if align_type is 'cv2_affine':
tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
else:
tfm = get_affine_transform_matrix(src_pts, ref_pts)

face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))

return face_img

人脸识别

采用insightface,固化模型如下:

编码函数调用如下:

1
2
3
4
5
6
7
8
def recognition(rec_net,face,device):
emb = rec_net(torch_transform(face).to(device).unsqueeze(0))
return emb

torch_transform = trans2.Compose([
trans2.ToTensor(),
trans2.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

主函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
if __name__ == '__main__':
# 检测模型加载
ret_model_path = '...pt'
arc_model_path = '...pt'
ref_pts = get_reference_facial_points(default_square= True) # 获得人脸关键点标准
detect_net,reg_net, device = load_model(ret_model_path,arc_model_path) # 模型加载
image_path = "./1.jpeg"
img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
boxes = detect(detect_net,device,img_raw) # 人脸检测
faces = []
for (x1,y1),(x2,y2),pt1,score in boxes:
face = warp_and_crop_face(np.array(img_raw), pt1, ref_pts, crop_size=(112,112)) # 人脸对齐
embedding = recognition(reg_net,face,device) # 人脸特征编码
print(embedding)

本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!