import numpy as np import cv2 as cv import os import tqdm from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from sklearn.svm import SVC from joblib import dump # 加载图像 def load_dataset(directory): files = [] for root, dirs, file_list in os.walk(directory): for file in file_list: files.append(os.path.join(root, file)) return files # 提取特征 ## 提取轮廓特征 def extract_contour_features(img): contours, _ = cv.findContours(img, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) contour = contours[0] area = cv.contourArea(contour) perimeter = cv.arcLength(contour, True) return [area, perimeter] ## 提取形状特征 def extract_shape_features(contour): x, y, w, h = cv.boundingRect(contour) aspect_ratio = float(w) / h rect_area = w * h shape_factor = cv.contourArea(contour) / rect_area return [aspect_ratio, shape_factor] ## 计算HU矩 def extract_hu_moments(contour): moments = cv.moments(contour) hu_moments = cv.HuMoments(moments) return hu_moments.flatten() ## 特征向量构建 def extract_features(img_path): img = cv.imread(img_path, cv.IMREAD_GRAYSCALE) if img is None: raise FileNotFoundError(f"无法加载图像: {img_path}") _, img_bin = cv.threshold(img, 128, 255, cv.THRESH_BINARY) contours, _ = cv.findContours(img_bin, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) if len(contours) == 0: return [0] * 11 contour = contours[0] contour_features = extract_contour_features(img_bin) shape_features = extract_shape_features(contour) hu_moments = extract_hu_moments(contour) feature_vector = contour_features + shape_features + hu_moments.tolist() return feature_vector + [0] * (11 - len(feature_vector)) # 加载数据集和标签 face_paths = load_dataset("cache/pretrained/face/") face_train_data = [(path, True) for path in face_paths] non_face_paths = load_dataset("cache/pretrained/non_face/") non_face_train_data = [(path, False) for path in non_face_paths] train_test_data = face_train_data + non_face_train_data train_data, test_data = train_test_split(train_test_data, test_size=0.3) # 提取特征和标签 X_train = np.vstack([extract_features(train_path) for train_path, _ in tqdm.tqdm(train_data, desc="数据集特征提取中:")]) X_test = np.vstack([extract_features(test_path) for test_path, _ in tqdm.tqdm(test_data, desc="测试集特征提取中:")]) Y_train = np.array([label for _, label in train_data]) Y_test = np.array([label for _, label in test_data]) # 训练分类器 classifier = SVC(kernel='linear', max_iter=10000) classifier.fit(X_train, Y_train) # 评估 Y_pred = classifier.predict(X_test) accuracy = accuracy_score(Y_test, Y_pred) print(f"准确率: {accuracy * 100:.2f}%") # 保存模型 os.makedirs("models", exist_ok=True) dump(classifier, "models/classifier.pkl")