获取人脸数据集

This commit is contained in:
2024-11-28 14:47:32 +08:00
parent 58baaf68e1
commit 1ef65ff1a2
7 changed files with 113 additions and 7 deletions
+10 -1
View File
@@ -143,4 +143,13 @@ cython_debug/
# jetbrains # jetbrains
.idea/ .idea/
*/.idea/ */.idea/
# mnist dataset
实验六/DataImages*
实验六/data/
实验六/cache/
实验六/models/
# dataset
实验七/cache/
+5 -1
View File
@@ -6,4 +6,8 @@ matplotlib
pillow pillow
scikit-learn==1.3 scikit-learn==1.3
jupyterlab jupyterlab
jupyterlab-language-pack-zh-CN jupyterlab-language-pack-zh-CN
icecream
torch
torchvision
rich
+77
View File
@@ -0,0 +1,77 @@
import os
import requests
import tqdm
import tarfile
# 计算大小
def get_human_readable_size(size_in_bytes):
# 定义单位
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
# 计算单位和大小
index = 0
while size_in_bytes >= 1024 and index < len(units) - 1:
size_in_bytes /= 1024
index += 1
return f"{size_in_bytes:.2f} {units[index]}"
# 下载与进度条
def download(url,output_path):
filename = url.split('/')[-1]
download_path = os.path.join(output_path,filename)
response=requests.get(url,stream=True)
total_size = int(response.headers.get('content-length', 0))
os.makedirs(output_path,exist_ok=True)
print("下载 ",filename,'',output_path,' |大小= ',get_human_readable_size(total_size))
# 如已下载,跳过
if os.path.exists(download_path):
print(filename, ' 已存在,跳过')
return
with open(download_path,'wb') as file,tqdm.tqdm(
desc='下载进度',
total=total_size,
unit='B',
unit_scale=True,
unit_divisor=1024
)as bar:
for chunk in response.iter_content(chunk_size=1024):
file.write(chunk)
bar.update(len(chunk))
def decompress(file_path, output_dir):
os.makedirs(output_dir, exist_ok=True)
print('解压 '+file_path.split('/')[-1],'',output_dir)
with tarfile.open(file_path) as tar:
members = tar.getmembers()
with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar:
for member in members:
# 构建完整路径
full_path = os.path.join(output_dir, member.name)
# 因FDDB存在同一路径下存在相同文件的问题,特此跳过
# 如果文件已存在,跳过解压
if os.path.exists(full_path):
bar.update(1)
continue
# 解压文件
tar.extract(member, path=output_dir)
bar.update(1)
# 下载人脸数据集
face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz'
face_dataset_path='cache/dataset/face/'
download(face_dataset_url,face_dataset_path)
# 下载人脸数据标签
face_label_path='cache/dataset/face/label/'
face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz'
download(face_label_url,face_label_path)
# 解压数据集
decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path)
decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path)
+16
View File
@@ -0,0 +1,16 @@
# 这是一个示例 Python 脚本。
# 按 Shift+F10 执行或将其替换为您的代码。
# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
def print_hi(name):
# 在下面的代码行中使用断点来调试脚本。
print(f'Hi, {name}') # 按 Ctrl+F8 切换断点。
# 按装订区域中的绿色按钮以运行脚本。
if __name__ == '__main__':
print_hi('PyCharm')
# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助
+2 -2
View File
@@ -16,5 +16,5 @@ def save_img_subset(data, save_path, num_samples):
img.save(os.path.join(save_path, f"{i}-label-{label}.png")) img.save(os.path.join(save_path, f"{i}-label-{label}.png"))
# 保存前 600 张训练集图片和前 100 张测试集图片 # 保存前 600 张训练集图片和前 100 张测试集图片
save_img_subset(train_data, './DataImages-Train', 6000) save_img_subset(train_data, './DataImages-Train', 60000)
save_img_subset(test_data, './DataImages-Test', 1000) save_img_subset(test_data, './DataImages-Test', 10000)
Binary file not shown.

Before

Width:  |  Height:  |  Size: 289 B

After

Width:  |  Height:  |  Size: 8.1 KiB

+3 -3
View File
@@ -64,8 +64,8 @@ def ensure_dir_exists(directory):
os.makedirs(directory) os.makedirs(directory)
# 加载训练数据 # 加载训练数据
trains_paths, trains_labels = load_data("cache/pretrains/train") trains_paths, trains_labels = load_data("DataImages-Train")
test_paths, test_labels = load_data("cache/pretrains/test") test_paths, test_labels = load_data("DataImages-Test")
# 提取特征和标签 # 提取特征和标签
X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中:")]) X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中:")])
@@ -87,7 +87,7 @@ for test_sample in tqdm.tqdm(X_test, desc="测试集中预测进度"):
Y_pred.append(classifier.predict(test_sample.reshape(1, -1))) Y_pred.append(classifier.predict(test_sample.reshape(1, -1)))
accuracy = accuracy_score(Y_test, Y_pred) accuracy = accuracy_score(Y_test, Y_pred)
print(f"性能: {accuracy * 100:.2f}%") print(f"准确率: {accuracy * 100:.2f}%")
# 保存模型 # 保存模型
ensure_dir_exists("models") ensure_dir_exists("models")