diff --git a/.gitignore b/.gitignore index 2d020de..baa8e22 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,13 @@ cython_debug/ # jetbrains .idea/ -*/.idea/ \ No newline at end of file +*/.idea/ + +# mnist dataset +实验六/DataImages* +实验六/data/ +实验六/cache/ +实验六/models/ + +# dataset +实验七/cache/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 15e8805..508e8d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,8 @@ matplotlib pillow scikit-learn==1.3 jupyterlab -jupyterlab-language-pack-zh-CN \ No newline at end of file +jupyterlab-language-pack-zh-CN +icecream +torch +torchvision +rich \ No newline at end of file diff --git a/实验七/1.get_dataset.py b/实验七/1.get_dataset.py new file mode 100644 index 0000000..d9def9f --- /dev/null +++ b/实验七/1.get_dataset.py @@ -0,0 +1,77 @@ +import os +import requests +import tqdm +import tarfile + +# 计算大小 +def get_human_readable_size(size_in_bytes): + # 定义单位 + units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] + # 计算单位和大小 + index = 0 + while size_in_bytes >= 1024 and index < len(units) - 1: + size_in_bytes /= 1024 + index += 1 + return f"{size_in_bytes:.2f} {units[index]}" + +# 下载与进度条 +def download(url,output_path): + filename = url.split('/')[-1] + download_path = os.path.join(output_path,filename) + + response=requests.get(url,stream=True) + total_size = int(response.headers.get('content-length', 0)) + os.makedirs(output_path,exist_ok=True) + + print("下载 ",filename,' 到 ',output_path,' |大小= ',get_human_readable_size(total_size)) + + # 如已下载,跳过 + if os.path.exists(download_path): + print(filename, ' 已存在,跳过') + return + + with open(download_path,'wb') as file,tqdm.tqdm( + desc='下载进度', + total=total_size, + unit='B', + unit_scale=True, + unit_divisor=1024 + )as bar: + for chunk in response.iter_content(chunk_size=1024): + file.write(chunk) + bar.update(len(chunk)) + + +def decompress(file_path, output_dir): + os.makedirs(output_dir, exist_ok=True) + print('解压 '+file_path.split('/')[-1],' 到 ',output_dir) + with tarfile.open(file_path) as tar: + members = tar.getmembers() + with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar: + for member in members: + # 构建完整路径 + full_path = os.path.join(output_dir, member.name) + + # 因FDDB存在同一路径下存在相同文件的问题,特此跳过 + # 如果文件已存在,跳过解压 + if os.path.exists(full_path): + bar.update(1) + continue + + # 解压文件 + tar.extract(member, path=output_dir) + bar.update(1) + +# 下载人脸数据集 +face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz' +face_dataset_path='cache/dataset/face/' +download(face_dataset_url,face_dataset_path) + +# 下载人脸数据标签 +face_label_path='cache/dataset/face/label/' +face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz' +download(face_label_url,face_label_path) + +# 解压数据集 +decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path) +decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path) \ No newline at end of file diff --git a/实验七/main.py b/实验七/main.py new file mode 100644 index 0000000..eb389a0 --- /dev/null +++ b/实验七/main.py @@ -0,0 +1,16 @@ +# 这是一个示例 Python 脚本。 + +# 按 Shift+F10 执行或将其替换为您的代码。 +# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。 + + +def print_hi(name): + # 在下面的代码行中使用断点来调试脚本。 + print(f'Hi, {name}') # 按 Ctrl+F8 切换断点。 + + +# 按装订区域中的绿色按钮以运行脚本。 +if __name__ == '__main__': + print_hi('PyCharm') + +# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助 diff --git a/实验六/get_dataset.py b/实验六/get_dataset.py index e1f611f..ed02e70 100644 --- a/实验六/get_dataset.py +++ b/实验六/get_dataset.py @@ -16,5 +16,5 @@ def save_img_subset(data, save_path, num_samples): img.save(os.path.join(save_path, f"{i}-label-{label}.png")) # 保存前 600 张训练集图片和前 100 张测试集图片 -save_img_subset(train_data, './DataImages-Train', 6000) -save_img_subset(test_data, './DataImages-Test', 1000) +save_img_subset(train_data, './DataImages-Train', 60000) +save_img_subset(test_data, './DataImages-Test', 10000) diff --git a/实验六/test.png b/实验六/test.png index 1b1f72c..44775fc 100644 Binary files a/实验六/test.png and b/实验六/test.png differ diff --git a/实验六/train.py b/实验六/train.py index ed943d0..d6a7327 100644 --- a/实验六/train.py +++ b/实验六/train.py @@ -64,8 +64,8 @@ def ensure_dir_exists(directory): os.makedirs(directory) # 加载训练数据 -trains_paths, trains_labels = load_data("cache/pretrains/train") -test_paths, test_labels = load_data("cache/pretrains/test") +trains_paths, trains_labels = load_data("DataImages-Train") +test_paths, test_labels = load_data("DataImages-Test") # 提取特征和标签 X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中:")]) @@ -87,7 +87,7 @@ for test_sample in tqdm.tqdm(X_test, desc="测试集中预测进度"): Y_pred.append(classifier.predict(test_sample.reshape(1, -1))) accuracy = accuracy_score(Y_test, Y_pred) -print(f"性能: {accuracy * 100:.2f}%") +print(f"准确率: {accuracy * 100:.2f}%") # 保存模型 ensure_dir_exists("models")