Files
open-cv-experiment/实验八/1.get_dataset.py
T
2024-12-12 21:58:18 +08:00

111 lines
3.8 KiB
Python

import bz2
import os
import tempfile
import requests
import tqdm
import tarfile
# 计算大小
def get_human_readable_size(size_in_bytes):
# 定义单位
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
# 计算单位和大小
index = 0
while size_in_bytes >= 1024 and index < len(units) - 1:
size_in_bytes /= 1024
index += 1
return f"{size_in_bytes:.2f} {units[index]}"
# 下载与进度条
def download(url,output_path):
filename = url.split('/')[-1]
download_path = os.path.join(output_path,filename)
response=requests.get(url,stream=True)
total_size = int(response.headers.get('content-length', 0))
os.makedirs(output_path,exist_ok=True)
print("下载 ",filename,'',output_path,' |大小= ',get_human_readable_size(total_size))
# 如已下载,跳过
if os.path.exists(download_path):
print(filename, ' 已存在,跳过')
return
with open(download_path,'wb') as file,tqdm.tqdm(
desc='下载进度',
total=total_size,
unit='B',
unit_scale=True,
unit_divisor=1024
)as bar:
for chunk in response.iter_content(chunk_size=1024):
file.write(chunk)
bar.update(len(chunk))
def decompress(file_path, output_dir):
os.makedirs(output_dir, exist_ok=True)
print('解压 '+file_path.split('/')[-1],'',output_dir)
with tarfile.open(file_path) as tar:
members = tar.getmembers()
with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar:
for member in members:
# 构建完整路径
full_path = os.path.join(output_dir, member.name)
# 因FDDB存在同一路径下存在相同文件的问题,特此跳过
# 如果文件已存在,跳过解压
if os.path.exists(full_path):
bar.update(1)
continue
# 解压文件
tar.extract(member, path=output_dir)
bar.update(1)
def decompress_bz2(file_path, output_dir):
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
print('解压 ' + file_path.split('/')[-1], '', output_dir)
# 获取.bz2文件的名称,不包含扩展名
output_filename = os.path.basename(file_path).rsplit('.bz2', 1)[0]
# 构建输出文件的完整路径
output_file_path = os.path.join(output_dir, output_filename)
# 检查输出文件是否已存在,如果存在则跳过解压
if os.path.exists(output_file_path):
print(output_filename, ' 已存在,跳过解压')
return
# 解压.bz2文件
with bz2.BZ2File(file_path, 'rb') as bz2_file, open(output_file_path, 'wb') as output_file:
output_file.write(bz2_file.read())
# 下载人脸数据集
face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz'
face_dataset_path='cache/dataset/face/'
download(face_dataset_url,face_dataset_path)
# 下载人脸数据标签
face_label_path='cache/dataset/face/label/'
face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz'
download(face_label_url,face_label_path)
# 解压数据集
decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path)
decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path)
# 下载ResNet模型
model_url1='https://github.com/davisking/dlib-models/raw/refs/heads/master/shape_predictor_68_face_landmarks.dat.bz2'
model_url2='https://github.com/davisking/dlib-models/raw/refs/heads/master/dlib_face_recognition_resnet_model_v1.dat.bz2'
model_path='models/'
download(model_url1,model_path)
download(model_url2,model_path)
decompress_bz2(os.path.join(model_path,model_url1.split('/')[-1]),model_path)
decompress_bz2(os.path.join(model_path,model_url2.split('/')[-1]),model_path)