trained!
This commit is contained in:
@@ -1,111 +0,0 @@
|
|||||||
import bz2
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
import requests
|
|
||||||
import tqdm
|
|
||||||
import tarfile
|
|
||||||
|
|
||||||
# 计算大小
|
|
||||||
def get_human_readable_size(size_in_bytes):
|
|
||||||
# 定义单位
|
|
||||||
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
|
|
||||||
# 计算单位和大小
|
|
||||||
index = 0
|
|
||||||
while size_in_bytes >= 1024 and index < len(units) - 1:
|
|
||||||
size_in_bytes /= 1024
|
|
||||||
index += 1
|
|
||||||
return f"{size_in_bytes:.2f} {units[index]}"
|
|
||||||
|
|
||||||
# 下载与进度条
|
|
||||||
def download(url,output_path):
|
|
||||||
filename = url.split('/')[-1]
|
|
||||||
download_path = os.path.join(output_path,filename)
|
|
||||||
|
|
||||||
response=requests.get(url,stream=True)
|
|
||||||
total_size = int(response.headers.get('content-length', 0))
|
|
||||||
os.makedirs(output_path,exist_ok=True)
|
|
||||||
|
|
||||||
print("下载 ",filename,' 到 ',output_path,' |大小= ',get_human_readable_size(total_size))
|
|
||||||
|
|
||||||
# 如已下载,跳过
|
|
||||||
if os.path.exists(download_path):
|
|
||||||
print(filename, ' 已存在,跳过')
|
|
||||||
return
|
|
||||||
|
|
||||||
with open(download_path,'wb') as file,tqdm.tqdm(
|
|
||||||
desc='下载进度',
|
|
||||||
total=total_size,
|
|
||||||
unit='B',
|
|
||||||
unit_scale=True,
|
|
||||||
unit_divisor=1024
|
|
||||||
)as bar:
|
|
||||||
for chunk in response.iter_content(chunk_size=1024):
|
|
||||||
file.write(chunk)
|
|
||||||
bar.update(len(chunk))
|
|
||||||
|
|
||||||
|
|
||||||
def decompress(file_path, output_dir):
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
print('解压 '+file_path.split('/')[-1],' 到 ',output_dir)
|
|
||||||
with tarfile.open(file_path) as tar:
|
|
||||||
members = tar.getmembers()
|
|
||||||
with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar:
|
|
||||||
for member in members:
|
|
||||||
# 构建完整路径
|
|
||||||
full_path = os.path.join(output_dir, member.name)
|
|
||||||
|
|
||||||
# 因FDDB存在同一路径下存在相同文件的问题,特此跳过
|
|
||||||
# 如果文件已存在,跳过解压
|
|
||||||
if os.path.exists(full_path):
|
|
||||||
bar.update(1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 解压文件
|
|
||||||
tar.extract(member, path=output_dir)
|
|
||||||
bar.update(1)
|
|
||||||
|
|
||||||
|
|
||||||
def decompress_bz2(file_path, output_dir):
|
|
||||||
# 确保输出目录存在
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
print('解压 ' + file_path.split('/')[-1], ' 到 ', output_dir)
|
|
||||||
|
|
||||||
# 获取.bz2文件的名称,不包含扩展名
|
|
||||||
output_filename = os.path.basename(file_path).rsplit('.bz2', 1)[0]
|
|
||||||
|
|
||||||
# 构建输出文件的完整路径
|
|
||||||
output_file_path = os.path.join(output_dir, output_filename)
|
|
||||||
|
|
||||||
# 检查输出文件是否已存在,如果存在则跳过解压
|
|
||||||
if os.path.exists(output_file_path):
|
|
||||||
print(output_filename, ' 已存在,跳过解压')
|
|
||||||
return
|
|
||||||
|
|
||||||
# 解压.bz2文件
|
|
||||||
with bz2.BZ2File(file_path, 'rb') as bz2_file, open(output_file_path, 'wb') as output_file:
|
|
||||||
output_file.write(bz2_file.read())
|
|
||||||
|
|
||||||
# 下载人脸数据集
|
|
||||||
face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz'
|
|
||||||
face_dataset_path='cache/dataset/face/'
|
|
||||||
download(face_dataset_url,face_dataset_path)
|
|
||||||
|
|
||||||
# 下载人脸数据标签
|
|
||||||
face_label_path='cache/dataset/face/label/'
|
|
||||||
face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz'
|
|
||||||
download(face_label_url,face_label_path)
|
|
||||||
|
|
||||||
# 解压数据集
|
|
||||||
decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path)
|
|
||||||
decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path)
|
|
||||||
|
|
||||||
# 下载ResNet模型
|
|
||||||
model_url1='https://github.com/davisking/dlib-models/raw/refs/heads/master/shape_predictor_68_face_landmarks.dat.bz2'
|
|
||||||
model_url2='https://github.com/davisking/dlib-models/raw/refs/heads/master/dlib_face_recognition_resnet_model_v1.dat.bz2'
|
|
||||||
model_path='models/'
|
|
||||||
download(model_url1,model_path)
|
|
||||||
download(model_url2,model_path)
|
|
||||||
|
|
||||||
decompress_bz2(os.path.join(model_path,model_url1.split('/')[-1]),model_path)
|
|
||||||
decompress_bz2(os.path.join(model_path,model_url2.split('/')[-1]),model_path)
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
import cv2 as cv
|
|
||||||
import os
|
|
||||||
import tqdm
|
|
||||||
|
|
||||||
# 处理图像
|
|
||||||
def pretrain(img_path,output_path):
|
|
||||||
img=cv.imread(img_path, cv.IMREAD_GRAYSCALE)
|
|
||||||
_,img=cv.threshold(img,127,255,cv.THRESH_BINARY)
|
|
||||||
img=cv.blur(img,(3,3))
|
|
||||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
||||||
cv.imwrite(output_path, img)
|
|
||||||
|
|
||||||
# 获取文件路径
|
|
||||||
def get_img_name(directory, extensions=None):
|
|
||||||
if extensions is None:
|
|
||||||
extensions = ['.png', '.jpg','.jpeg']
|
|
||||||
files = []
|
|
||||||
for root, dirs, file_names in os.walk(directory):
|
|
||||||
for file_name in file_names:
|
|
||||||
if any(file_name.lower().endswith(ext) for ext in extensions):
|
|
||||||
files.append(os.path.join(root, file_name))
|
|
||||||
return files
|
|
||||||
|
|
||||||
# 处理人脸数据集
|
|
||||||
os.makedirs('cache/pretrained/face/', exist_ok=True)
|
|
||||||
face_files=get_img_name('cache/dataset/face/')
|
|
||||||
print('预处理人脸数据中:')
|
|
||||||
for img_path in tqdm.tqdm(face_files):
|
|
||||||
relative_path=os.path.relpath(img_path, 'cache/dataset/face/')
|
|
||||||
output_path=os.path.join('cache/pretrained/face', relative_path)
|
|
||||||
pretrain(img_path, output_path)
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
import dlib
|
|
||||||
import cv2 as cv
|
|
||||||
import joblib
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
|
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.multioutput import MultiOutputClassifier
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
# 提取人脸特征
|
|
||||||
def extract_face_features(image_path):
|
|
||||||
img = cv.imread(image_path)
|
|
||||||
detections = detector(img, 1)
|
|
||||||
face_features = []
|
|
||||||
face_bboxes = []
|
|
||||||
for rect in detections:
|
|
||||||
shape = predictor(img, rect)
|
|
||||||
face_descriptor = face_rec_model.compute_face_descriptor(img, shape)
|
|
||||||
face_features.append(face_descriptor)
|
|
||||||
bbox = (rect.left(), rect.top(), rect.right(), rect.bottom())
|
|
||||||
face_bboxes.append(bbox)
|
|
||||||
return face_features, face_bboxes
|
|
||||||
|
|
||||||
# 获取图片路径
|
|
||||||
def get_img_path(directory, extension=None):
|
|
||||||
if extension is None:
|
|
||||||
extension = ['.jpg', '.jpeg', '.png']
|
|
||||||
files = []
|
|
||||||
for root, dirs, file_names in os.walk(directory):
|
|
||||||
for file_name in file_names:
|
|
||||||
if any(file_name.lower().endswith(ext) for ext in extension):
|
|
||||||
files.append(os.path.join(root, file_name))
|
|
||||||
return files
|
|
||||||
|
|
||||||
|
|
||||||
# 加载dlib模型
|
|
||||||
detector = dlib.get_frontal_face_detector()
|
|
||||||
predictor = dlib.shape_predictor('models/shape_predictor_68_face_landmarks.dat')
|
|
||||||
face_rec_model = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')
|
|
||||||
|
|
||||||
# 获取图片
|
|
||||||
img_directory = 'cache/pretrained/'
|
|
||||||
images = get_img_path(img_directory)
|
|
||||||
|
|
||||||
# 提取特征
|
|
||||||
features = []
|
|
||||||
labels = []
|
|
||||||
|
|
||||||
for image in tqdm(images, desc='提取图片特征中:'):
|
|
||||||
extracted_features, face_bboxes = extract_face_features(image)
|
|
||||||
for feature,bbox in zip(extracted_features, face_bboxes):
|
|
||||||
features.append(feature)
|
|
||||||
labels.append(bbox)
|
|
||||||
|
|
||||||
X_train = np.array(features)
|
|
||||||
Y_train = np.array(labels)
|
|
||||||
|
|
||||||
# 分割测试集
|
|
||||||
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
|
|
||||||
|
|
||||||
# 训练SVM模型
|
|
||||||
print('训练模型中')
|
|
||||||
clf=MultiOutputClassifier(RandomForestClassifier(n_estimators=100,random_state=42))
|
|
||||||
clf.fit(X_train, Y_train)
|
|
||||||
|
|
||||||
# 评估训练数据
|
|
||||||
predictions = clf.predict(X_test)
|
|
||||||
accuracy=(predictions==Y_test).mean()
|
|
||||||
print(f'分类器准确度:{accuracy * 100:.2f}%')
|
|
||||||
|
|
||||||
os.makedirs('models/', exist_ok=True)
|
|
||||||
joblib.dump(clf, 'models/my_classifier.pkl')
|
|
||||||
-16
@@ -1,16 +0,0 @@
|
|||||||
# 这是一个示例 Python 脚本。
|
|
||||||
|
|
||||||
# 按 Shift+F10 执行或将其替换为您的代码。
|
|
||||||
# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
|
|
||||||
|
|
||||||
|
|
||||||
def print_hi(name):
|
|
||||||
# 在下面的代码行中使用断点来调试脚本。
|
|
||||||
print(f'Hi, {name}') # 按 Ctrl+F8 切换断点。
|
|
||||||
|
|
||||||
|
|
||||||
# 按装订区域中的绿色按钮以运行脚本。
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print_hi('PyCharm')
|
|
||||||
|
|
||||||
# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助
|
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
import cv2
|
||||||
|
import cv2 as cv
|
||||||
|
import dlib
|
||||||
|
import joblib
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
# 方法区
|
||||||
|
## 转换为二值图和GRB图
|
||||||
|
def img_cvt(img):
|
||||||
|
img_gray = cv.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
_,img_gray=cv.threshold(img_gray,127,255,cv.THRESH_BINARY)
|
||||||
|
img_gray = cv.blur(img_gray,(3,3))
|
||||||
|
img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
|
||||||
|
return img_gray,img_rgb
|
||||||
|
## 提取人脸特征
|
||||||
|
def extract_face_feature(img):
|
||||||
|
faces=detector(img)
|
||||||
|
if len(faces)>0:
|
||||||
|
face=faces[0]
|
||||||
|
landmarks=predictor(img,face)
|
||||||
|
face_descriptor=face_rec_model.compute_face_descriptor(img,landmarks)
|
||||||
|
return np.array(face_descriptor)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
## 生成伪样品
|
||||||
|
def gen_negative_samples(features_dim=128):
|
||||||
|
return np.random.uniform(-1,1,features_dim)
|
||||||
|
|
||||||
|
# 变量区
|
||||||
|
dataset_video=cv.VideoCapture('cache/dataset.mp4')
|
||||||
|
datasets=[]
|
||||||
|
datasets_rgb=[]
|
||||||
|
features=[]
|
||||||
|
negative_samples=[]
|
||||||
|
|
||||||
|
# 收集数据集
|
||||||
|
## 采用视频的格式收集图片
|
||||||
|
while(True):
|
||||||
|
ret,frame=dataset_video.read()
|
||||||
|
if not ret:
|
||||||
|
break
|
||||||
|
datasets.append(frame)
|
||||||
|
|
||||||
|
# 加载人脸检测模型
|
||||||
|
detector=dlib.get_frontal_face_detector()
|
||||||
|
predictor=dlib.shape_predictor("models/shape_predictor_68_face_landmarks_GTX.dat")
|
||||||
|
face_rec_model=dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')
|
||||||
|
|
||||||
|
# 预处理数据集
|
||||||
|
for dataset in tqdm(datasets,desc='正在预处理数据集'):
|
||||||
|
_,dataset_rgb = img_cvt(dataset)
|
||||||
|
datasets_rgb.append(dataset_rgb)
|
||||||
|
|
||||||
|
# 求人脸特征
|
||||||
|
for dataset in tqdm(datasets_rgb,desc='正在求人脸特征'):
|
||||||
|
face_descriptor=extract_face_feature(dataset)
|
||||||
|
if face_descriptor is not None:
|
||||||
|
features.append(face_descriptor)
|
||||||
|
|
||||||
|
# 生成负样本
|
||||||
|
for i in tqdm(range(len(features)*15),desc='正在生成负样本'):
|
||||||
|
negative_sample=gen_negative_samples()
|
||||||
|
negative_samples.append(negative_sample)
|
||||||
|
|
||||||
|
# 编码数据集
|
||||||
|
positive_labels=[1]*len(features)
|
||||||
|
negative_labels=[0]*len(negative_samples)
|
||||||
|
|
||||||
|
samples=features+negative_samples
|
||||||
|
labels=positive_labels+negative_labels
|
||||||
|
|
||||||
|
X_train=np.array(samples)
|
||||||
|
Y_train=np.array(labels)
|
||||||
|
|
||||||
|
# 分离数据集和训练集
|
||||||
|
X_train,X_test,Y_train,Y_test=train_test_split(X_train,Y_train,test_size=0.2,random_state=42,stratify=Y_train)
|
||||||
|
|
||||||
|
# 训练
|
||||||
|
print('正在训练分类器')
|
||||||
|
classifier=SVC(kernel='linear',C=1.0,random_state=42)
|
||||||
|
classifier.fit(X_train,Y_train)
|
||||||
|
|
||||||
|
# 评估分类器性能
|
||||||
|
Y_pred=[]
|
||||||
|
for test_sample in tqdm(X_test,desc='评估分类器'):
|
||||||
|
Y_pred.append(classifier.predict(test_sample.reshape(1,-1)))
|
||||||
|
accuracy=accuracy_score(Y_test,Y_pred)
|
||||||
|
print(f'准确率:{accuracy*100:.2f}%')
|
||||||
|
|
||||||
|
# 保存模型
|
||||||
|
joblib.dump(classifier,'models/classifier.pkl')
|
||||||
Reference in New Issue
Block a user