根据之前人脸识别项目，自己做了个小的，准备放在家里的树莓派上做个智能安防。

正文

人脸识别技术，其实分为三个算法，人脸检测、人脸对齐、人脸特征编码。
人脸识别系统流程：

首先对已知人脸进行以上三个步骤最后得到对应的人脸特征编码，每个已知人脸保存1～3个特征编码
摄像头实时采集对人脸图像进行以上三个算法最终得到图像中的人脸特征编码（可能存在多个人脸）
通过计算实时图像中人脸特征编码与已知人脸特征编码的L2距离取最大值确定人脸身份
通过人脸检测box分辨率过滤小人脸。通过L2阈值过滤未知人脸的特征编码。

所需头文件

1
2
3

from torchvision import transforms as trans2
import cv2
import numpy as np

模型加载

所有模型采用torch.script固化，因此无需网络结构，代码部分只有模型加载和图像预处理部分。

def load_model(ret_pth, arc_path):
    '''
    ret_pth: 人脸检测模型路径
    arc_path：人脸识别模型路径
    '''
    ret_net = torch.jit.load(ret_pth)
    reg_net = torch.jit.load(arc_path)
    device = torch.device("cpu")
    detect = ret_net.to(device)
    recog = reg_net.to(device)
    return detect,recog,device

人脸检测

下表为在网络爬取的人脸图像数据集上多种人脸检测算法对比效果

因为爬取图像较多，所以并未统计实际包含人脸数目，对于检测算法实际检测出的人脸结果也仅仅大致检查了下，并未仔细统计，因此下表仅供参考。

算法	模型	检测出人脸数量	所用时间
MTCNN	/	10935张	6749.72s
Retinaface	ResNet50	19083张	9153.78s
	mobileNet	16026张	1179.37s
Dlib	/	12329张	4318.97s
Facebox	mobileNet	12375张	567.14s

最后选择Retinaface进行封装，固化模型下载链接如下：
ResNet50
MobileNet

代码如下

def detect(detect_net,device,img_raw): 
    ‘’‘
    detect_net：模型
    device: cpu or gpu 加载器
    img_raw: cv图像
    ’‘’
    # 模型加载
    img = np.float32(img_raw)

    # 图像预处理
    im_height, im_width, _ = img.shape
    scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)
    # 检测
    loc, conf, landms = detect_net(img)  # 匹配好的先验框编码后的位置,置信度,人脸 landmark

    # 检测结果解码
    priorbox = PriorBox(cfg_re50, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg_re50['variance'])
    boxes = boxes * scale / 1
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_re50['variance'])
    scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                           img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                           img.shape[3], img.shape[2]])
    scale1 = scale1.to(device)
    landms = landms * scale1 / 1
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > cfg_re50['confidence_threshold'])[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:cfg_re50['top_k']]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(dets, cfg_re50['nms_threshold'])
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:cfg_re50['keep_top_k'], :]
    landms = landms[:cfg_re50['keep_top_k'], :]
    dets = np.concatenate((dets, landms), axis=1)
    boxes  = []
    for b in dets:
        if b[4] > cfg_re50['vis_thres']:
            boxes.append(((int(b[0]), int(b[1])),(int(b[2]),int(b[3])),[[b[5],b[6]],[b[7],b[8]],[b[9],b[10]],[b[11],b[12]],[b[13],b[14]]],b[4]))
    return boxes

解码工具类PriorBox.py

import torch
from itertools import product as product
from math import ceil

cfg_re50 = {
    'min_sizes': [[16, 32], [64, 128], [256, 512]],
    'steps': [8, 16, 32],
    'variance': [0.1, 0.2],
    'clip': False,
    #配置参数
    'confidence_threshold':0.02,
    'nms_threshold':0.4,
    'top_k':5000,
    'keep_top_k':750,
    'vis_thres':0.6
}

def decode(loc, priors, variances):
    boxes = torch.cat((
        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
        priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes

def decode_landm(pre, priors, variances):
    landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
                        ), dim=1)
    return landms


def py_cpu_nms(dets, thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep

class PriorBox(object):
    def __init__(self, cfg, image_size=None, phase='train'):
        super(PriorBox, self).__init__()
        self.min_sizes = cfg['min_sizes']
        self.steps = cfg['steps']
        self.clip = cfg['clip']
        self.image_size = image_size
        self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
        self.name = "s"

    def forward(self):
        anchors = []
        for k, f in enumerate(self.feature_maps):
            min_sizes = self.min_sizes[k]
            for i, j in product(range(f[0]), range(f[1])):
                for min_size in min_sizes:
                    s_kx = min_size / self.image_size[1]
                    s_ky = min_size / self.image_size[0]
                    dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
                    dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
                    for cy, cx in product(dense_cy, dense_cx):
                        anchors += [cx, cy, s_kx, s_ky]

        # back to torch land
        output = torch.Tensor(anchors).view(-1, 4)
        if self.clip:
            output.clamp_(max=1, min=0)
        return output

人脸对齐

最常用的映射变换，根据上一步人脸检测到的关键点对人脸进行摆正对齐。


import numpy as np
import cv2

REFERENCE_FACIAL_POINTS = [
    [30.29459953,  51.69630051],
    [65.53179932,  51.50139999],
    [48.02519989,  71.73660278],
    [33.54930115,  92.3655014],
    [62.72990036,  92.20410156]
]

DEFAULT_CROP_SIZE = (112, 112)

class FaceWarpException(Exception):
    def __str__(self):
        return 'In File {}:{}'.format(
            __file__, super.__str__(self))

def get_reference_facial_points(output_size=None,
                                inner_padding_factor=0.0,
                                outer_padding=(0, 0),
                                default_square=False):

    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)

    if default_square:
        size_diff = max(tmp_crop_size) - tmp_crop_size
        tmp_5pts += size_diff / 2
        tmp_crop_size += size_diff

    if (output_size and
            output_size[0] == tmp_crop_size[0] and
            output_size[1] == tmp_crop_size[1]):
        return tmp_5pts

    if (inner_padding_factor == 0 and
            outer_padding == (0, 0)):
        if output_size is None:
            #print('No paddings to do: return default reference points')
            return tmp_5pts
        else:
            raise FaceWarpException(
                'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))

    # check output size
    if not (0 <= inner_padding_factor <= 1.0):
        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')

    if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
            and output_size is None):
        output_size = tmp_crop_size * \
            (1 + inner_padding_factor * 2).astype(np.int32)
        output_size += np.array(outer_padding)
        #print('              deduced from paddings, output_size = ', output_size)

    if not (outer_padding[0] < output_size[0]
            and outer_padding[1] < output_size[1]):
        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
                                'and outer_padding[1] < output_size[1])')

    if inner_padding_factor > 0:
        size_diff = tmp_crop_size * inner_padding_factor * 2
        tmp_5pts += size_diff / 2
        tmp_crop_size += np.round(size_diff).astype(np.int32)

    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2

    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
        raise FaceWarpException('Must have (output_size - outer_padding)'
                                '= some_scale * (crop_size * (1.0 + inner_padding_factor)')

    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
    tmp_5pts = tmp_5pts * scale_factor

    tmp_crop_size = size_bf_outer_pad

    reference_5point = tmp_5pts + np.array(outer_padding)
    tmp_crop_size = output_size

    return reference_5point

def get_affine_transform_matrix(src_pts, dst_pts):
    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
    n_pts = src_pts.shape[0]
    ones = np.ones((n_pts, 1), src_pts.dtype)
    src_pts_ = np.hstack([src_pts, ones])
    dst_pts_ = np.hstack([dst_pts, ones])

    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)

    if rank == 3:
        tfm = np.float32([
            [A[0, 0], A[1, 0], A[2, 0]],
            [A[0, 1], A[1, 1], A[2, 1]]
        ])
    elif rank == 2:
        tfm = np.float32([
            [A[0, 0], A[1, 0], 0],
            [A[0, 1], A[1, 1], 0]
        ])
    return tfm


def warp_and_crop_face(src_img,
                       facial_pts,
                       reference_pts=None,
                       crop_size=(96, 112),
                       align_type='smilarity'):

    ref_pts = np.float32(reference_pts)
    ref_pts_shp = ref_pts.shape
    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
        raise FaceWarpException(
            'reference_pts.shape must be (K,2) or (2,K) and K>2')

    if ref_pts_shp[0] == 2:
        ref_pts = ref_pts.T

    src_pts = np.float32(facial_pts)
    src_pts_shp = src_pts.shape
    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
        raise FaceWarpException(
            'facial_pts.shape must be (K,2) or (2,K) and K>2')

    if src_pts_shp[0] == 2:
        src_pts = src_pts.T

#    #print('--->src_pts:\n', src_pts
#    #print('--->ref_pts\n', ref_pts

    if src_pts.shape != ref_pts.shape:
        raise FaceWarpException(
            'facial_pts and reference_pts must have the same shape')

    if align_type is 'cv2_affine':
        tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
    else:
        tfm = get_affine_transform_matrix(src_pts, ref_pts)

    face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))

    return face_img

人脸识别

采用insightface,固化模型如下:

编码函数调用如下:

def recognition(rec_net,face,device):
    emb = rec_net(torch_transform(face).to(device).unsqueeze(0))
    return emb

torch_transform = trans2.Compose([
    trans2.ToTensor(),
    trans2.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

主函数

if __name__ == '__main__':
    # 检测模型加载
    ret_model_path = '...pt'
    arc_model_path = '...pt'
    ref_pts =  get_reference_facial_points(default_square= True) # 获得人脸关键点标准
    detect_net,reg_net, device = load_model(ret_model_path,arc_model_path) # 模型加载
    image_path = "./1.jpeg"
    img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
    boxes = detect(detect_net,device,img_raw) # 人脸检测
    faces = []
    for (x1,y1),(x2,y2),pt1,score in boxes:
        face = warp_and_crop_face(np.array(img_raw), pt1, ref_pts, crop_size=(112,112)) # 人脸对齐
        embedding = recognition(reg_net,face,device) # 人脸特征编码
        print(embedding)

机器学习 Python

本博客所有文章除特别声明外，均采用 CC BY-SA 4.0 协议，转载请注明出处！

图像分类上一篇

PyTorch拾遗下一篇

人脸识别(连载中)

正文

所需头文件

模型加载

人脸检测

人脸对齐

人脸识别

主函数