import os import requests import tqdm import tarfile # 计算大小 def get_human_readable_size(size_in_bytes): # 定义单位 units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] # 计算单位和大小 index = 0 while size_in_bytes >= 1024 and index < len(units) - 1: size_in_bytes /= 1024 index += 1 return f"{size_in_bytes:.2f} {units[index]}" # 下载与进度条 def download(url,output_path): filename = url.split('/')[-1] download_path = os.path.join(output_path,filename) response=requests.get(url,stream=True) total_size = int(response.headers.get('content-length', 0)) os.makedirs(output_path,exist_ok=True) print("下载 ",filename,' 到 ',output_path,' |大小= ',get_human_readable_size(total_size)) # 如已下载,跳过 if os.path.exists(download_path): print(filename, ' 已存在,跳过') return with open(download_path,'wb') as file,tqdm.tqdm( desc='下载进度', total=total_size, unit='B', unit_scale=True, unit_divisor=1024 )as bar: for chunk in response.iter_content(chunk_size=1024): file.write(chunk) bar.update(len(chunk)) def decompress(file_path, output_dir): os.makedirs(output_dir, exist_ok=True) print('解压 '+file_path.split('/')[-1],' 到 ',output_dir) with tarfile.open(file_path) as tar: members = tar.getmembers() with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar: for member in members: # 构建完整路径 full_path = os.path.join(output_dir, member.name) # 因FDDB存在同一路径下存在相同文件的问题,特此跳过 # 如果文件已存在,跳过解压 if os.path.exists(full_path): bar.update(1) continue # 解压文件 tar.extract(member, path=output_dir) bar.update(1) # 下载人脸数据集 face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz' face_dataset_path='cache/dataset/face/' download(face_dataset_url,face_dataset_path) # 下载人脸数据标签 face_label_path='cache/dataset/face/label/' face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz' download(face_label_url,face_label_path) # 解压数据集 decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path) decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path)