diff --git a/实验七/1.get_dataset.py b/实验七/1.get_dataset.py index d9def9f..2de812c 100644 --- a/实验七/1.get_dataset.py +++ b/实验七/1.get_dataset.py @@ -2,6 +2,9 @@ import os import requests import tqdm import tarfile +import pickle +import numpy as np +from PIL import Image # 计算大小 def get_human_readable_size(size_in_bytes): @@ -72,6 +75,39 @@ face_label_path='cache/dataset/face/label/' face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz' download(face_label_url,face_label_path) +# 下载非人脸数据集 +non_face_path='cache/dataset/non_face/' +non_face_url='https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' +download(non_face_url,non_face_path) + # 解压数据集 decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path) -decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path) \ No newline at end of file +decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path) +decompress(os.path.join(non_face_path,non_face_url.split('/')[-1]),non_face_path) + +# 解码非人脸数据集 +def unpickle(file_path): + with open(file_path, 'rb') as file: + return pickle.load(file, encoding='bytes') + +def save_images(data, labels, directory): + os.makedirs(directory, exist_ok=True) + for i in tqdm.tqdm(range(len(data))): + img = data[i].reshape((3, 32, 32)).transpose((1, 2, 0)) + img = img.astype(np.uint8) + label = str(labels[i]) + img_filename = os.path.join(directory, f"{label}_{i}.png") + img_to_save = Image.fromarray(img) + img_to_save.save(img_filename) + +print('解码非人脸数据集:') +cifar10_dir = os.path.join(non_face_path, 'cifar-10-batches-py') +output_dir = non_face_path +batch_file = 'data_batch_1' +file_path = os.path.join(cifar10_dir, batch_file) +batch_data = unpickle(file_path) + +images = batch_data[b'data'] +labels = batch_data[b'labels'] + +save_images(images, labels, output_dir) \ No newline at end of file