Files
2024-12-15 20:03:39 +08:00

102 lines
2.8 KiB
Python

import cv2
import cv2 as cv
import dlib
import joblib
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from tqdm import tqdm
# 方法区
## 转换为二值图和RGB图
def img_cvt(img):
img_gray = cv.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, img_gray = cv.threshold(img_gray, 127, 255, cv.THRESH_BINARY)
img_gray = cv.blur(img_gray, (3, 3))
img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
return img_gray, img_rgb
## 提取人脸特征
def extract_face_feature(img):
faces = detector(img)
if len(faces) > 0:
face = faces[0]
landmarks = predictor(img, face)
face_descriptor = face_rec_model.compute_face_descriptor(img, landmarks)
return np.array(face_descriptor)
else:
return None
## 生成伪样品
def gen_negative_samples(features_dim=128):
return np.random.uniform(-1, 1, features_dim)
# 变量区
dataset_video = cv.VideoCapture('cache/dataset.mp4')
datasets = []
datasets_rgb = []
features = []
negative_samples = []
# 收集数据集
## 采用视频的格式收集图片
while (True):
ret, frame = dataset_video.read()
if not ret:
break
datasets.append(frame)
# 加载人脸检测模型
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("models/shape_predictor_68_face_landmarks_GTX.dat")
face_rec_model = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')
# 预处理数据集
for dataset in tqdm(datasets, desc='正在预处理数据集'):
_, dataset_rgb = img_cvt(dataset)
datasets_rgb.append(dataset_rgb)
# 求人脸特征
for dataset in tqdm(datasets_rgb, desc='正在求人脸特征'):
face_descriptor = extract_face_feature(dataset)
if face_descriptor is not None:
features.append(face_descriptor)
# 生成负样本
for i in tqdm(range(len(features) * 75), desc='正在生成负样本'):
negative_sample = gen_negative_samples()
negative_samples.append(negative_sample)
# 编码数据集
positive_labels = [1] * len(features)
negative_labels = [0] * len(negative_samples)
samples = features + negative_samples
labels = positive_labels + negative_labels
X_train = np.array(samples)
Y_train = np.array(labels)
# 分离数据集和训练集
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=42, stratify=Y_train)
# 训练
print('正在训练分类器')
classifier = SVC(kernel='linear', C=1.0, random_state=42)
classifier.fit(X_train, Y_train)
# 评估分类器性能
Y_pred = []
for test_sample in tqdm(X_test, desc='评估分类器'):
Y_pred.append(classifier.predict(test_sample.reshape(1, -1)))
accuracy = accuracy_score(Y_test, Y_pred)
print(f'准确率:{accuracy * 100:.5f}%')
# 保存模型
joblib.dump(classifier, 'models/classifier.pkl')