89 lines
2.9 KiB
Python
89 lines
2.9 KiB
Python
import numpy as np
|
|
import cv2 as cv
|
|
import os
|
|
import tqdm
|
|
from sklearn.metrics import accuracy_score
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.svm import SVC
|
|
from joblib import dump
|
|
|
|
|
|
# 加载图像
|
|
def load_dataset(directory):
|
|
files = []
|
|
for root, dirs, file_list in os.walk(directory):
|
|
for file in file_list:
|
|
files.append(os.path.join(root, file))
|
|
return files
|
|
|
|
# 提取特征
|
|
## 提取轮廓特征
|
|
def extract_contour_features(img):
|
|
contours, _ = cv.findContours(img, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
|
|
contour = contours[0]
|
|
area = cv.contourArea(contour)
|
|
perimeter = cv.arcLength(contour, True)
|
|
return [area, perimeter]
|
|
|
|
## 提取形状特征
|
|
def extract_shape_features(contour):
|
|
x, y, w, h = cv.boundingRect(contour)
|
|
aspect_ratio = float(w) / h
|
|
rect_area = w * h
|
|
shape_factor = cv.contourArea(contour) / rect_area
|
|
return [aspect_ratio, shape_factor]
|
|
|
|
## 计算HU矩
|
|
def extract_hu_moments(contour):
|
|
moments = cv.moments(contour)
|
|
hu_moments = cv.HuMoments(moments)
|
|
return hu_moments.flatten()
|
|
|
|
## 特征向量构建
|
|
def extract_features(img_path):
|
|
img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
|
|
if img is None:
|
|
raise FileNotFoundError(f"无法加载图像: {img_path}")
|
|
|
|
_, img_bin = cv.threshold(img, 128, 255, cv.THRESH_BINARY)
|
|
contours, _ = cv.findContours(img_bin, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
|
|
if len(contours) == 0:
|
|
return [0] * 11
|
|
contour = contours[0]
|
|
contour_features = extract_contour_features(img_bin)
|
|
shape_features = extract_shape_features(contour)
|
|
hu_moments = extract_hu_moments(contour)
|
|
feature_vector = contour_features + shape_features + hu_moments.tolist()
|
|
return feature_vector + [0] * (11 - len(feature_vector))
|
|
|
|
# 加载数据集和标签
|
|
face_paths = load_dataset("cache/pretrained/face/")
|
|
face_train_data = [(path, True) for path in face_paths]
|
|
non_face_paths = load_dataset("cache/pretrained/non_face/")
|
|
non_face_train_data = [(path, False) for path in non_face_paths]
|
|
|
|
train_test_data = face_train_data + non_face_train_data
|
|
train_data, test_data = train_test_split(train_test_data, test_size=0.3)
|
|
|
|
# 提取特征和标签
|
|
X_train = np.vstack([extract_features(train_path) for train_path, _ in tqdm.tqdm(train_data, desc="数据集特征提取中:")])
|
|
X_test = np.vstack([extract_features(test_path) for test_path, _ in tqdm.tqdm(test_data, desc="测试集特征提取中:")])
|
|
|
|
Y_train = np.array([label for _, label in train_data])
|
|
Y_test = np.array([label for _, label in test_data])
|
|
|
|
|
|
# 训练分类器
|
|
classifier = SVC(kernel='linear', max_iter=10000)
|
|
classifier.fit(X_train, Y_train)
|
|
|
|
# 评估
|
|
Y_pred = classifier.predict(X_test)
|
|
|
|
accuracy = accuracy_score(Y_test, Y_pred)
|
|
print(f"准确率: {accuracy * 100:.2f}%")
|
|
|
|
# 保存模型
|
|
os.makedirs("models", exist_ok=True)
|
|
dump(classifier, "models/classifier.pkl")
|