获取非人脸数据集

This commit is contained in:
2024-11-28 15:36:34 +08:00
parent 1ef65ff1a2
commit 9b7ceadf8e
+37 -1
View File
@@ -2,6 +2,9 @@ import os
import requests import requests
import tqdm import tqdm
import tarfile import tarfile
import pickle
import numpy as np
from PIL import Image
# 计算大小 # 计算大小
def get_human_readable_size(size_in_bytes): def get_human_readable_size(size_in_bytes):
@@ -72,6 +75,39 @@ face_label_path='cache/dataset/face/label/'
face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz' face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz'
download(face_label_url,face_label_path) download(face_label_url,face_label_path)
# 下载非人脸数据集
non_face_path='cache/dataset/non_face/'
non_face_url='https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
download(non_face_url,non_face_path)
# 解压数据集 # 解压数据集
decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path) decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path)
decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path) decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path)
decompress(os.path.join(non_face_path,non_face_url.split('/')[-1]),non_face_path)
# 解码非人脸数据集
def unpickle(file_path):
with open(file_path, 'rb') as file:
return pickle.load(file, encoding='bytes')
def save_images(data, labels, directory):
os.makedirs(directory, exist_ok=True)
for i in tqdm.tqdm(range(len(data))):
img = data[i].reshape((3, 32, 32)).transpose((1, 2, 0))
img = img.astype(np.uint8)
label = str(labels[i])
img_filename = os.path.join(directory, f"{label}_{i}.png")
img_to_save = Image.fromarray(img)
img_to_save.save(img_filename)
print('解码非人脸数据集:')
cifar10_dir = os.path.join(non_face_path, 'cifar-10-batches-py')
output_dir = non_face_path
batch_file = 'data_batch_1'
file_path = os.path.join(cifar10_dir, batch_file)
batch_data = unpickle(file_path)
images = batch_data[b'data']
labels = batch_data[b'labels']
save_images(images, labels, output_dir)