open-cv-experiment/实验八/1.get_dataset.py

import os
import requests
import tqdm
import tarfile

# 计算大小
def get_human_readable_size(size_in_bytes):
    # 定义单位
    units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
    # 计算单位和大小
    index = 0
    while size_in_bytes >= 1024 and index < len(units) - 1:
        size_in_bytes /= 1024
        index += 1
    return f"{size_in_bytes:.2f} {units[index]}"

# 下载与进度条
def download(url,output_path):
    filename = url.split('/')[-1]
    download_path = os.path.join(output_path,filename)

    response=requests.get(url,stream=True)
    total_size = int(response.headers.get('content-length', 0))
    os.makedirs(output_path,exist_ok=True)

    print("下载 ",filename,' 到 ',output_path,' |大小= ',get_human_readable_size(total_size))

    # 如已下载，跳过
    if os.path.exists(download_path):
        print(filename, ' 已存在，跳过')
        return

    with open(download_path,'wb') as file,tqdm.tqdm(
        desc='下载进度',
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024
    )as bar:
        for chunk in response.iter_content(chunk_size=1024):
            file.write(chunk)
            bar.update(len(chunk))


def decompress(file_path, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    print('解压 '+file_path.split('/')[-1],' 到 ',output_dir)
    with tarfile.open(file_path) as tar:
        members = tar.getmembers()
        with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar:
            for member in members:
                # 构建完整路径
                full_path = os.path.join(output_dir, member.name)

                # 因FDDB存在同一路径下存在相同文件的问题，特此跳过
                # 如果文件已存在，跳过解压
                if os.path.exists(full_path):
                    bar.update(1)
                    continue

                # 解压文件
                tar.extract(member, path=output_dir)
                bar.update(1)

# 下载人脸数据集
face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz'
face_dataset_path='cache/dataset/face/'
download(face_dataset_url,face_dataset_path)

# 下载人脸数据标签
face_label_path='cache/dataset/face/label/'
face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz'
download(face_label_url,face_label_path)

# 解压数据集
decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path)
decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path)