获取人脸数据集
This commit is contained in:
@@ -144,3 +144,12 @@ cython_debug/
|
|||||||
# jetbrains
|
# jetbrains
|
||||||
.idea/
|
.idea/
|
||||||
*/.idea/
|
*/.idea/
|
||||||
|
|
||||||
|
# mnist dataset
|
||||||
|
实验六/DataImages*
|
||||||
|
实验六/data/
|
||||||
|
实验六/cache/
|
||||||
|
实验六/models/
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
实验七/cache/
|
||||||
@@ -7,3 +7,7 @@ pillow
|
|||||||
scikit-learn==1.3
|
scikit-learn==1.3
|
||||||
jupyterlab
|
jupyterlab
|
||||||
jupyterlab-language-pack-zh-CN
|
jupyterlab-language-pack-zh-CN
|
||||||
|
icecream
|
||||||
|
torch
|
||||||
|
torchvision
|
||||||
|
rich
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import tqdm
|
||||||
|
import tarfile
|
||||||
|
|
||||||
|
# 计算大小
|
||||||
|
def get_human_readable_size(size_in_bytes):
|
||||||
|
# 定义单位
|
||||||
|
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
|
||||||
|
# 计算单位和大小
|
||||||
|
index = 0
|
||||||
|
while size_in_bytes >= 1024 and index < len(units) - 1:
|
||||||
|
size_in_bytes /= 1024
|
||||||
|
index += 1
|
||||||
|
return f"{size_in_bytes:.2f} {units[index]}"
|
||||||
|
|
||||||
|
# 下载与进度条
|
||||||
|
def download(url,output_path):
|
||||||
|
filename = url.split('/')[-1]
|
||||||
|
download_path = os.path.join(output_path,filename)
|
||||||
|
|
||||||
|
response=requests.get(url,stream=True)
|
||||||
|
total_size = int(response.headers.get('content-length', 0))
|
||||||
|
os.makedirs(output_path,exist_ok=True)
|
||||||
|
|
||||||
|
print("下载 ",filename,' 到 ',output_path,' |大小= ',get_human_readable_size(total_size))
|
||||||
|
|
||||||
|
# 如已下载,跳过
|
||||||
|
if os.path.exists(download_path):
|
||||||
|
print(filename, ' 已存在,跳过')
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(download_path,'wb') as file,tqdm.tqdm(
|
||||||
|
desc='下载进度',
|
||||||
|
total=total_size,
|
||||||
|
unit='B',
|
||||||
|
unit_scale=True,
|
||||||
|
unit_divisor=1024
|
||||||
|
)as bar:
|
||||||
|
for chunk in response.iter_content(chunk_size=1024):
|
||||||
|
file.write(chunk)
|
||||||
|
bar.update(len(chunk))
|
||||||
|
|
||||||
|
|
||||||
|
def decompress(file_path, output_dir):
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
print('解压 '+file_path.split('/')[-1],' 到 ',output_dir)
|
||||||
|
with tarfile.open(file_path) as tar:
|
||||||
|
members = tar.getmembers()
|
||||||
|
with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar:
|
||||||
|
for member in members:
|
||||||
|
# 构建完整路径
|
||||||
|
full_path = os.path.join(output_dir, member.name)
|
||||||
|
|
||||||
|
# 因FDDB存在同一路径下存在相同文件的问题,特此跳过
|
||||||
|
# 如果文件已存在,跳过解压
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
bar.update(1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 解压文件
|
||||||
|
tar.extract(member, path=output_dir)
|
||||||
|
bar.update(1)
|
||||||
|
|
||||||
|
# 下载人脸数据集
|
||||||
|
face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz'
|
||||||
|
face_dataset_path='cache/dataset/face/'
|
||||||
|
download(face_dataset_url,face_dataset_path)
|
||||||
|
|
||||||
|
# 下载人脸数据标签
|
||||||
|
face_label_path='cache/dataset/face/label/'
|
||||||
|
face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz'
|
||||||
|
download(face_label_url,face_label_path)
|
||||||
|
|
||||||
|
# 解压数据集
|
||||||
|
decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path)
|
||||||
|
decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path)
|
||||||
+16
@@ -0,0 +1,16 @@
|
|||||||
|
# 这是一个示例 Python 脚本。
|
||||||
|
|
||||||
|
# 按 Shift+F10 执行或将其替换为您的代码。
|
||||||
|
# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
|
||||||
|
|
||||||
|
|
||||||
|
def print_hi(name):
|
||||||
|
# 在下面的代码行中使用断点来调试脚本。
|
||||||
|
print(f'Hi, {name}') # 按 Ctrl+F8 切换断点。
|
||||||
|
|
||||||
|
|
||||||
|
# 按装订区域中的绿色按钮以运行脚本。
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print_hi('PyCharm')
|
||||||
|
|
||||||
|
# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助
|
||||||
+2
-2
@@ -16,5 +16,5 @@ def save_img_subset(data, save_path, num_samples):
|
|||||||
img.save(os.path.join(save_path, f"{i}-label-{label}.png"))
|
img.save(os.path.join(save_path, f"{i}-label-{label}.png"))
|
||||||
|
|
||||||
# 保存前 600 张训练集图片和前 100 张测试集图片
|
# 保存前 600 张训练集图片和前 100 张测试集图片
|
||||||
save_img_subset(train_data, './DataImages-Train', 6000)
|
save_img_subset(train_data, './DataImages-Train', 60000)
|
||||||
save_img_subset(test_data, './DataImages-Test', 1000)
|
save_img_subset(test_data, './DataImages-Test', 10000)
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 289 B After Width: | Height: | Size: 8.1 KiB |
+3
-3
@@ -64,8 +64,8 @@ def ensure_dir_exists(directory):
|
|||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
|
||||||
# 加载训练数据
|
# 加载训练数据
|
||||||
trains_paths, trains_labels = load_data("cache/pretrains/train")
|
trains_paths, trains_labels = load_data("DataImages-Train")
|
||||||
test_paths, test_labels = load_data("cache/pretrains/test")
|
test_paths, test_labels = load_data("DataImages-Test")
|
||||||
|
|
||||||
# 提取特征和标签
|
# 提取特征和标签
|
||||||
X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中:")])
|
X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中:")])
|
||||||
@@ -87,7 +87,7 @@ for test_sample in tqdm.tqdm(X_test, desc="测试集中预测进度"):
|
|||||||
Y_pred.append(classifier.predict(test_sample.reshape(1, -1)))
|
Y_pred.append(classifier.predict(test_sample.reshape(1, -1)))
|
||||||
accuracy = accuracy_score(Y_test, Y_pred)
|
accuracy = accuracy_score(Y_test, Y_pred)
|
||||||
|
|
||||||
print(f"性能: {accuracy * 100:.2f}%")
|
print(f"准确率: {accuracy * 100:.2f}%")
|
||||||
|
|
||||||
# 保存模型
|
# 保存模型
|
||||||
ensure_dir_exists("models")
|
ensure_dir_exists("models")
|
||||||
|
|||||||
Reference in New Issue
Block a user