102 lines
2.8 KiB
Python
102 lines
2.8 KiB
Python
import cv2
|
|
import cv2 as cv
|
|
import dlib
|
|
import joblib
|
|
import numpy as np
|
|
from sklearn.metrics import accuracy_score
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.svm import SVC
|
|
from tqdm import tqdm
|
|
|
|
|
|
# 方法区
|
|
## 转换为二值图和RGB图
|
|
def img_cvt(img):
|
|
img_gray = cv.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
_, img_gray = cv.threshold(img_gray, 127, 255, cv.THRESH_BINARY)
|
|
img_gray = cv.blur(img_gray, (3, 3))
|
|
img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
|
|
return img_gray, img_rgb
|
|
|
|
|
|
## 提取人脸特征
|
|
def extract_face_feature(img):
|
|
faces = detector(img)
|
|
if len(faces) > 0:
|
|
face = faces[0]
|
|
landmarks = predictor(img, face)
|
|
face_descriptor = face_rec_model.compute_face_descriptor(img, landmarks)
|
|
return np.array(face_descriptor)
|
|
else:
|
|
return None
|
|
|
|
|
|
## 生成伪样品
|
|
def gen_negative_samples(features_dim=128):
|
|
return np.random.uniform(-1, 1, features_dim)
|
|
|
|
|
|
# 变量区
|
|
dataset_video = cv.VideoCapture('cache/dataset.mp4')
|
|
datasets = []
|
|
datasets_rgb = []
|
|
features = []
|
|
negative_samples = []
|
|
|
|
# 收集数据集
|
|
## 采用视频的格式收集图片
|
|
while (True):
|
|
ret, frame = dataset_video.read()
|
|
if not ret:
|
|
break
|
|
datasets.append(frame)
|
|
|
|
# 加载人脸检测模型
|
|
detector = dlib.get_frontal_face_detector()
|
|
predictor = dlib.shape_predictor("models/shape_predictor_68_face_landmarks_GTX.dat")
|
|
face_rec_model = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')
|
|
|
|
# 预处理数据集
|
|
for dataset in tqdm(datasets, desc='正在预处理数据集'):
|
|
_, dataset_rgb = img_cvt(dataset)
|
|
datasets_rgb.append(dataset_rgb)
|
|
|
|
# 求人脸特征
|
|
for dataset in tqdm(datasets_rgb, desc='正在求人脸特征'):
|
|
face_descriptor = extract_face_feature(dataset)
|
|
if face_descriptor is not None:
|
|
features.append(face_descriptor)
|
|
|
|
# 生成负样本
|
|
for i in tqdm(range(len(features) * 2048), desc='正在生成负样本'):
|
|
negative_sample = gen_negative_samples()
|
|
negative_samples.append(negative_sample)
|
|
|
|
# 编码数据集
|
|
positive_labels = [1] * len(features)
|
|
negative_labels = [0] * len(negative_samples)
|
|
|
|
samples = features + negative_samples
|
|
labels = positive_labels + negative_labels
|
|
|
|
X_train = np.array(samples)
|
|
Y_train = np.array(labels)
|
|
|
|
# 分离数据集和训练集
|
|
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=42, stratify=Y_train)
|
|
|
|
# 训练
|
|
print('正在训练分类器')
|
|
classifier = SVC(kernel='linear', C=1.0, random_state=42)
|
|
classifier.fit(X_train, Y_train)
|
|
|
|
# 评估分类器性能
|
|
Y_pred = []
|
|
for test_sample in tqdm(X_test, desc='评估分类器'):
|
|
Y_pred.append(classifier.predict(test_sample.reshape(1, -1)))
|
|
accuracy = accuracy_score(Y_test, Y_pred)
|
|
print(f'准确率:{accuracy * 100:.5f}%')
|
|
|
|
# 保存模型
|
|
joblib.dump(classifier, 'models/classifier.pkl')
|