import time import cv2 as cv import joblib import numpy as np import tqdm import os from sklearn.metrics import accuracy_score from sklearn.svm import SVC from sklearn.utils import parallel_backend # 提取轮廓特征 def extract_contour_features(img): contours, _ = cv.findContours(img, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) contour = contours[0] area = cv.contourArea(contour) perimeter = cv.arcLength(contour, True) return [area, perimeter] # 提取形状特征 def extract_shape_features(contour): x, y, w, h = cv.boundingRect(contour) aspect_ratio = float(w) / h rect_area = w * h shape_factor = cv.contourArea(contour) / rect_area return [aspect_ratio, shape_factor] # 计算HU矩 def extract_hu_moments(contour): moments = cv.moments(contour) hu_moments = cv.HuMoments(moments) return hu_moments.flatten() # 特征向量构建 def extract_features(img_path): img = cv.imread(img_path, cv.IMREAD_GRAYSCALE) if img is None: raise FileNotFoundError(f"无法加载图像: {img_path}") _, img_bin = cv.threshold(img, 128, 255, cv.THRESH_BINARY) contours, _ = cv.findContours(img_bin, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) contour = contours[0] contour_features = extract_contour_features(img_bin) shape_features = extract_shape_features(contour) hu_moments = extract_hu_moments(contour) feature_vector = contour_features + shape_features + hu_moments.tolist() return feature_vector # 加载图像路径和标签 def load_data(dataset_path): image_paths = [] labels = [] for file_name in os.listdir(dataset_path): if file_name.endswith(".png"): label = int(file_name.split("-")[-1].split(".")[0]) image_paths.append(os.path.join(dataset_path, file_name)) labels.append(label) return image_paths, labels # 创建文件夹 def ensure_dir_exists(directory): if not os.path.exists(directory): os.makedirs(directory) # 加载训练数据 trains_paths, trains_labels = load_data("cache/pretrains/train") test_paths, test_labels = load_data("cache/pretrains/test") # 提取特征和标签 X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中:")]) Y_train = np.array(trains_labels) X_test = np.array([extract_features(test_path) for test_path in tqdm.tqdm(test_paths, desc="测试集特征提取中:")]) Y_test = np.array(test_labels) # 训练分类器 classifier = SVC(kernel="linear") with parallel_backend('threading',n_jobs=-1): start_time=time.time() classifier.fit(X_train, Y_train) elapsed_time = time.time()-start_time print(f"模型训练耗时: {elapsed_time:.2f} 秒") # 在测试集上进行评估 Y_pred = [] for test_sample in tqdm.tqdm(X_test, desc="测试集中预测进度"): Y_pred.append(classifier.predict(test_sample.reshape(1, -1))) accuracy = accuracy_score(Y_test, Y_pred) print(f"性能: {accuracy * 100:.2f}%") # 保存模型 ensure_dir_exists("models") joblib.dump(classifier, "models/classifier.pkl")