open-cv-experiment/实验六/train.py

import time

import cv2 as cv
import joblib
import numpy as np
import tqdm
import os

from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.utils import parallel_backend


# 提取轮廓特征
def extract_contour_features(img):
    contours, _ = cv.findContours(img, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
    contour = contours[0]
    area = cv.contourArea(contour)
    perimeter = cv.arcLength(contour, True)
    return [area, perimeter]

# 提取形状特征
def extract_shape_features(contour):
    x, y, w, h = cv.boundingRect(contour)
    aspect_ratio = float(w) / h
    rect_area = w * h
    shape_factor = cv.contourArea(contour) / rect_area
    return [aspect_ratio, shape_factor]

# 计算HU矩
def extract_hu_moments(contour):
    moments = cv.moments(contour)
    hu_moments = cv.HuMoments(moments)
    return hu_moments.flatten()

# 特征向量构建
def extract_features(img_path):
    img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
    if img is None:
        raise FileNotFoundError(f"无法加载图像: {img_path}")
    _, img_bin = cv.threshold(img, 128, 255, cv.THRESH_BINARY)
    contours, _ = cv.findContours(img_bin, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
    contour = contours[0]
    contour_features = extract_contour_features(img_bin)
    shape_features = extract_shape_features(contour)
    hu_moments = extract_hu_moments(contour)
    feature_vector = contour_features + shape_features + hu_moments.tolist()
    return feature_vector

# 加载图像路径和标签
def load_data(dataset_path):
    image_paths = []
    labels = []
    for file_name in os.listdir(dataset_path):
        if file_name.endswith(".png"):
            label = int(file_name.split("-")[-1].split(".")[0])
            image_paths.append(os.path.join(dataset_path, file_name))
            labels.append(label)
    return image_paths, labels

# 创建文件夹
def ensure_dir_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

# 加载训练数据
trains_paths, trains_labels = load_data("cache/pretrains/train")
test_paths, test_labels = load_data("cache/pretrains/test")

# 提取特征和标签
X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中：")])
Y_train = np.array(trains_labels)
X_test = np.array([extract_features(test_path) for test_path in tqdm.tqdm(test_paths, desc="测试集特征提取中：")])
Y_test = np.array(test_labels)

# 训练分类器
classifier = SVC(kernel="linear")
with parallel_backend('threading',n_jobs=-1):
    start_time=time.time()
    classifier.fit(X_train, Y_train)
    elapsed_time = time.time()-start_time

print(f"模型训练耗时: {elapsed_time:.2f} 秒")

# 在测试集上进行评估
Y_pred = []
for test_sample in tqdm.tqdm(X_test, desc="测试集中预测进度"):
    Y_pred.append(classifier.predict(test_sample.reshape(1, -1)))
accuracy = accuracy_score(Y_test, Y_pred)

print(f"性能: {accuracy * 100:.2f}%")

# 保存模型
ensure_dir_exists("models")
joblib.dump(classifier, "models/classifier.pkl")