From 1ef65ff1a2184de162507de0140588264e64357e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BE=A1=E5=9D=82=E6=98=B4?= Date: Thu, 28 Nov 2024 14:47:32 +0800 Subject: [PATCH] =?UTF-8?q?=E8=8E=B7=E5=8F=96=E4=BA=BA=E8=84=B8=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 11 +++++- requirements.txt | 6 +++- 实验七/1.get_dataset.py | 77 ++++++++++++++++++++++++++++++++++++++++ 实验七/main.py | 16 +++++++++ 实验六/get_dataset.py | 4 +-- 实验六/test.png | Bin 289 -> 8250 bytes 实验六/train.py | 6 ++-- 7 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 实验七/1.get_dataset.py create mode 100644 实验七/main.py diff --git a/.gitignore b/.gitignore index 2d020de..baa8e22 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,13 @@ cython_debug/ # jetbrains .idea/ -*/.idea/ \ No newline at end of file +*/.idea/ + +# mnist dataset +实验六/DataImages* +实验六/data/ +实验六/cache/ +实验六/models/ + +# dataset +实验七/cache/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 15e8805..508e8d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,8 @@ matplotlib pillow scikit-learn==1.3 jupyterlab -jupyterlab-language-pack-zh-CN \ No newline at end of file +jupyterlab-language-pack-zh-CN +icecream +torch +torchvision +rich \ No newline at end of file diff --git a/实验七/1.get_dataset.py b/实验七/1.get_dataset.py new file mode 100644 index 0000000..d9def9f --- /dev/null +++ b/实验七/1.get_dataset.py @@ -0,0 +1,77 @@ +import os +import requests +import tqdm +import tarfile + +# 计算大小 +def get_human_readable_size(size_in_bytes): + # 定义单位 + units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] + # 计算单位和大小 + index = 0 + while size_in_bytes >= 1024 and index < len(units) - 1: + size_in_bytes /= 1024 + index += 1 + return f"{size_in_bytes:.2f} {units[index]}" + +# 下载与进度条 +def download(url,output_path): + filename = url.split('/')[-1] + download_path = os.path.join(output_path,filename) + + response=requests.get(url,stream=True) + total_size = int(response.headers.get('content-length', 0)) + os.makedirs(output_path,exist_ok=True) + + print("下载 ",filename,' 到 ',output_path,' |大小= ',get_human_readable_size(total_size)) + + # 如已下载,跳过 + if os.path.exists(download_path): + print(filename, ' 已存在,跳过') + return + + with open(download_path,'wb') as file,tqdm.tqdm( + desc='下载进度', + total=total_size, + unit='B', + unit_scale=True, + unit_divisor=1024 + )as bar: + for chunk in response.iter_content(chunk_size=1024): + file.write(chunk) + bar.update(len(chunk)) + + +def decompress(file_path, output_dir): + os.makedirs(output_dir, exist_ok=True) + print('解压 '+file_path.split('/')[-1],' 到 ',output_dir) + with tarfile.open(file_path) as tar: + members = tar.getmembers() + with tqdm.tqdm(total=len(members), desc='解压进度', unit='file') as bar: + for member in members: + # 构建完整路径 + full_path = os.path.join(output_dir, member.name) + + # 因FDDB存在同一路径下存在相同文件的问题,特此跳过 + # 如果文件已存在,跳过解压 + if os.path.exists(full_path): + bar.update(1) + continue + + # 解压文件 + tar.extract(member, path=output_dir) + bar.update(1) + +# 下载人脸数据集 +face_dataset_url='http://vis-www.cs.umass.edu/fddb/originalPics.tar.gz' +face_dataset_path='cache/dataset/face/' +download(face_dataset_url,face_dataset_path) + +# 下载人脸数据标签 +face_label_path='cache/dataset/face/label/' +face_label_url='http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz' +download(face_label_url,face_label_path) + +# 解压数据集 +decompress(os.path.join(face_dataset_path,face_dataset_url.split('/')[-1]),face_dataset_path) +decompress(os.path.join(face_label_path,face_label_url.split('/')[-1]),face_label_path) \ No newline at end of file diff --git a/实验七/main.py b/实验七/main.py new file mode 100644 index 0000000..eb389a0 --- /dev/null +++ b/实验七/main.py @@ -0,0 +1,16 @@ +# 这是一个示例 Python 脚本。 + +# 按 Shift+F10 执行或将其替换为您的代码。 +# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。 + + +def print_hi(name): + # 在下面的代码行中使用断点来调试脚本。 + print(f'Hi, {name}') # 按 Ctrl+F8 切换断点。 + + +# 按装订区域中的绿色按钮以运行脚本。 +if __name__ == '__main__': + print_hi('PyCharm') + +# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助 diff --git a/实验六/get_dataset.py b/实验六/get_dataset.py index e1f611f..ed02e70 100644 --- a/实验六/get_dataset.py +++ b/实验六/get_dataset.py @@ -16,5 +16,5 @@ def save_img_subset(data, save_path, num_samples): img.save(os.path.join(save_path, f"{i}-label-{label}.png")) # 保存前 600 张训练集图片和前 100 张测试集图片 -save_img_subset(train_data, './DataImages-Train', 6000) -save_img_subset(test_data, './DataImages-Test', 1000) +save_img_subset(train_data, './DataImages-Train', 60000) +save_img_subset(test_data, './DataImages-Test', 10000) diff --git a/实验六/test.png b/实验六/test.png index 1b1f72c722b53f8f63da0123014c517946400fb6..44775fcb0e335e18730237d3319851a058e01760 100644 GIT binary patch literal 8250 zcmc(F`9GBX_y0Ba?8%J~l90s2B#{{;sccD-gzQPSgd{TvNyuBaBwI+5B;gKYEVqOt z*+OINvaf@gF`rZa!S|={^vj%#WLC@ziXReN#3bpLfT{!P0I3Doh z!kNr5Ccc6;NB?o5C5cvNaAKTSZZ6j5p87P~e-~tIvP4hrmlMpcOa8s^+vAbGAe-L* z^Oy65?5X>x&uzNhn9~>Xx*if4<6nt5F@laGCp#KnP^R&$fHKqnm0Y{aq}!qUo!-Zy zLeEUecJc{V@_2Wv1b_6Y_%Y4+JrzRcNNIzCcGRsH12yqDIic3E+b#TgH*A!W;|&iK zS#q0Q&yd{Z&9Z35PjITQiix=>uyG{$T#MZD<}=E{R@Nm8B3YV0df2OSa9)j9U5vNs zFspF=?V#^e)#ChIxs$ zD$%#9ZB>%brKY5e94hnfm+9p#Ab$8@(EUL_$J)jwJTS^>#9>or)7)>b!G)0B4WaM( z>>p^}S*g{GA7g$VFMqhi5usPuUDV?-;He>%pdb|EuXU}&YH(g-i2KE=bf>^Tjp~T# zCQfH7>oe}mvhx3O6_)~jMD)o~*wW-%e0`|Vn*Z2M+|%rj;uDG%Je6uBr`#q)5(rUl(wj}d%ogqGTZ=e%%$Onj$2=vC?Mh?mk>$C33zgKv$?#SL%Q=*$eX>Dx|8W!xxge(ELXGu9y4pwvMZ1B0kGA|E-Mfh@ z??h{{VRw$0tVnAO-O;#vHm^%rRW-v{zQpQqqLEnC%zAJHiQsZYxLz8;t6Qs!@g3bg zYC@{14}N&{TClEc;^zs!QPo3_Sgt0vIuEfnt+kpUQhK~gMhNN&!ts`m*D}YvzGZiJ zbnJsOI~9ozQB4|687tS0t-oqF0cN$2ho+oQv7DlE)q@SAU%A-94<#u4e_U9bx!OY(dBClH~W-J>_Kf%57DY(fSDm zl|OKdqi%Q=x~6>6Au(Qgp%`biD@B$+=QY~i%YLsPVnfOvSQS@TE32fojV%YAl!|V= zu9m(QTx(T)>7?$$u9pWK+ec$K4_-%@DxCtnuQh=`C&;H(rKO46XE@fx zdvW&Tep@Rm?|VFsiSeGx|DV5GOym|95<)c0^VO6jIV_E}BxGb{V6!@1%fn_ON|FMk z)~7{`8ULk8edHuDre658PIuQ1AI3!W$dNFF9*RecVbYD= z8M66(v&n4}Az1qot53skr~lQPl0m+2mDkjGmJg()rvAgtZo@3U8}I|=#)zv`*3p|LzmjPsIbZCm|PFOq|}39 z6<>1M8wJob{GB>B3)cV8Ym=V|gxUB6<>5co4ogCA`izQaKt8H`4t?lf3 zXZ$4esidMv9N7Ds0%M z#U4lLvYb^GfujqAQysA{{j@#yJXT!%V#d|v#fU477t@qkXsB(`w?ms85p{JkA_wx4E&Wty5V?kDTBW>@8y{MCu@*=Tt!trA^B`MuA zfDumyIL-)Zk9|5Db^Wu3B$D8B@~C)>L4(fAk2q7Q0;%r`2Wt#&){V*aHU`#?#I!t? zdM`uS?P^3a9o?j8k)~Vku*j+8O8KQ2sdC$Q{wOWE`aWs$;GKlwU1zPhP9Sy`io!M} zz0PWpjg*p-BUzn=-81rrOv#pG86DQj0YO2V3>q~yHWm=NxM!PzRrp!$P#+>rl59E3 z+wG1~OXs!5gPpHkgYh;cA}dcwUjcdjwK7c$`(|Otd_Zf+NwEObfdg;(Yj)EadC_Mh zkeVa?+~Zet`*piNSSlSH7Ig^EmX^*%3SppH@HAw^K`5l|L?TU z?@URZN6U(KK^g%B1Y^=R;a6fc>#ueE_;D(giVN9vz=$i@;M(4IxT=fCg)(;-WabWa zYwK>A^_#xg+yMFmfU5RagYylB#ey-&M4(L~wO4YVl@i-a60+r^#7@?82Nh3z> ze0+6fVuaEB^4pS}nfIF}lEh+a*cP(5VurY(IY?F&x=? zA@}a_Y;`WBK9>4dQwBT9*+Ty~oR)Y&nEizWcGOpukGL0+P;UEQW`cnHqA~7^+`d@j z7uQqEF*45zfV}j0@I^P?yLV4pTbn=H$#EJd9{2CCms&b~he0FmtgAXQ9OoA3`&OR| zss~TgSPw4E?lTuywFKY(Iw~=gcARnT!P59fxqyf$`tIq{K3Yg!W@ct>Z7n!#oS}Nx z_Pts=jSL+ub_bp}G2w#EA`*$BPWFY!YL?JhmR-N85vhB&t~DvwFy*sqv&d!$~%-Smh&;COM5 zmSp_;`Z|NbSYN+WKA=1_D-by8Q4t&(%FkLm;Q6u3z2+!cpnd;d{`9?1W*jPP`g(ge zsMP&y(ZA3r>y(nd*~^zNM@L5&78X|9z8&xcl9+Mx#Zg7Vah?Y|uH`3$x$keU-3SZ} zba7#SFU9tCRC8fx2MjMICAG7;d?4NU*RNk2!L{kvHXiGtYD{Fv0e9~XOs(GX_J-sg zt-n8lBqOD#yQ@$qjs($YF{ zqSaXqg24ON_95YV3Dz&SfCO~UiEl<8m_CW+nH!fg#}j*mRKq&XoOrXu~JIb z!eX&HIwpnP6*luF@tZsR!i4EC13X|pe>5><3(i}#w2#My#{M`aEFeD_z$44-> z`E}3hpu!ETgfO8h@m#)N^^iMeo3Ue-g(W>)Y`SarZ3!rT*f*R|jBxDfv|;V?!}fya zh%ARbKCf$fbt@n+ka(Z=v8)VuQbGDe5IByn?{X)Oshf*?vve$@%}&uAab#cF_rYc! z&SYa_1J0zfvhs*XTa)+%Ag6d7Ag7NHl7p|c!zc8XK~?y*>**`sK?b zi=yX2jzn_jgddm_u0*2;mLD=n$5iEmI0dDh?Oa$VQsj9&hST z3pAH_Y791cWp%mx=v=mwXURYRQwENKy zpxJEPKg$JXbyfmpt*gTx-Q3Sd&iiv6nBIU=#hqam9!Q9cGG1kDE|2y0inOl^VS&Lt zZJPjhX^{>fH4V{ydt}=p9F*Fp`KU@R^rBEo%&8M+hEnd>s z%@H2%m$amKW=DsK;4aWYUTz1yuzz>HEfdSiD9b_eA^}l$_)y>C z%Ch+3;&2vg1NUc9AH8J+H#e7^6osEv74V!5)Yj37YZ*(M#vN27M6$ZxOLV!Wp`lT5 zr6;$0##t>LAmi0d+S^B08GL{4TVOj}SAYKe34np&`MFNJ|LRtduWw?9>;APfC}UP` zhcd#)Qp3FE;94+Oj`?R#4W2?;quRo95PfY83Zc5_+#OXeZile!8{$L6MhaJ?{>1=# z<1-aVA&EvW%@zH%_DQZjlM=3H(d+(TQS`vqetI*j>Q$GI<{L|;zP>*2R&?i0AizuN zI*J5#7VaVa!NB|XK}6uGynkKqle5~^>Qtw-GDq>i^`+G_*@y`Hs|A6ZS3NyFH$&=T zc&_RuM;8|tyJKvWMar5Q)pb$IY~MDZ^f`R^@Xf!MNbKo;#;JzeC#8s0RfLevP+|h9 zGYd|SOJ@6cMn@=cgGo?&llRDpA-GNW2m#QuK#dDsW( zTDwJmtwzc_@}lU%wq%@P1S#91$oATwYhv|o+KhJF&Cv7N$Ruz3mRd$fj=B4-Teos) z3fHoLF1I?bFy6RrpM%>m2C`uv+zPv^KYpB~(jb}nEP=-IsCg7-R2Bh>ooUKBSceCj zz4(TKkUt`F9xcWjssHZn*diNRJWew{;%z5b5lpzQ;}+84%`04gGVCkFeK|aFJvcJ< zWH)AbcAYy?Urs3OG5+}+jSACpyjgIs96Y;*>-c+>D9qmlh9CH{UKP!H{Z;BjoCu}mpQYlh@3hY4f>G!DM7QF{*Q=H)esf07K ziQo+f>{Z!+p(c^>T8ON)bv2W+%1~lf=af^KwNmoGwYAM}J5yQM(Kj7>WB(?q9CJlU z{;SF&l%{Bk4*))>CbEe}`4%J#+#fIOg^gPkU%xX8;GLZOPx85gC)WmN*CDY0S5iuT z-8XAJwZsnV76<6CC>rvjLJb>E8fmi0Qf{*XDY))S#0TAF%>D zw*Uijrl-ceJ9m;HGG_28qV!34im=$0pQa>i?qY!sux@t@_vshD$Jo!9V^5W5BepHh358t40K>n3hWH3&a7>TpFGa7RmDcMb79Y89^{_14+PA`>}y+ zj6tqV$;ZK4bALya6AErr+wz|xqGv8tt#ZEJ=c<;QGa^2hAQE9#~_T;bxX$@u_WkYq&O^(979~-M&|WH!BCh zsrzO#`?u63`GM&xi=3AediRw*V5de(k| zYf^K1JSO~3eBX*VMf|e>r6&`1R+lfchSfLSu$OEBJ>y`~*|gR8gJ!NA+#{C=(`2to z&IrAPmIJmce7VdJteKToLwK}0EBn&QllAzMQf4xOKoo#Rjs&H+KpIj?J|~}qUW@Hw zw(Fv5PMWWAt7X;NmD-hxN^M6zg1r2}ToIFD0^4U}Uuvii7e&wOi17jokEzXC57tRO zNBX9kqMZc{T9hLgx1UN{q)NmYhLKd`#~=Y0Sw3nRGhY+{>+3pFO^dFKgIYS<5Y8D(9vYGp+GVk{ zcPwiF9y_f*lX_rPmy4pZ$7b055z&&DI@SD{FRsHLu5Dff6F$Vb->BjPDTP|hR z4`#7XNTvQ-aV~ffZV>gKv`v7D@F+B#COX{#Ic?ZWR0}V&y7wklm0yv7om~fN3aRK(uEWw{5Jt-f zXtaBC4BNkhOuE7A1-TyR@6YIvulaSXiWzj$)Pnqtp(+eBosv%?JqipZO24jy{(&Hq zKuc(L1pC^RIt^r*V=h&N(N|3LhQ-x>_3eI|I4=k`J;cU+&vp9V7ChC za>tlcNwe#}{k16VSwKIIW#!BXxMEdhU>4*z78F?`*}Hr|9uz9^IDajZU>w*LrH3*G zzj(6Ek>1&`Dn(<2R@=t6j6sKDnArdj&;1Y&r`tJo8|XnM?%l{C;-d60)A7$8wdKCC zWdaRSJj=2`BiejTJ=645hnwe)io7DBi14~nB(`~MSu@j=LQ;MI>yDfdDMpotpW8tV zV{&REqmlAQ%SktGNblrrgpjmWPU&#HSs}i`P*qbTv=Y_<2tXq8tA20~cOo7n7`b)e z1e9-!vJot4yl*ydmJbYhRYE^tf$@M;3bJm8>*B(KPwgx_^o&rSL%V1EO$+yoW`R$9 znE-MFosz0w>!T4Yf-G;OC+pt4c>^m0itjrK9@yrrR$;@ZP^XMlB<#}N$bkAJ=;WR; z>9j3SZe_Mx-s55gxOKVSj~~O}b7;&!&ZJF+)Hbl27(*4Si0sc^5#Xw)__$-lDPvGj zmRWT<{{4AVX%&sEcm67@@>Yyp@R%sCZJPf z1Jq^z;T%!yGHjNiU~dr5^DTB+Vx6yuC^3a&n<3Pyv*7m9Rp7!P<)<6-M>hhDz1iZ= zmj>|`JpFyN81o^!aDBHfD>5n#k~0WxB6Ywkz^i~<44i_=H;=rV0~mltmraS4CIj+B z^0|CyBb4@)G0zxeBQH2a>Y_H?94AhtxPT#wEP>0+2Yl+M|Fc_>pg`EN2mJEX0BHdv ziy_p{MJlvu9yQC#;hg9PWX%yXT@Pl`vMOa0EC{wem(S8>myH;(agXf!%=Q_WO>A?n zMG+f+)Mnuyz3OG9->9@AQ(lt!!KNexS{GA)d=;^N37_Sx!dvgSySOwvtC^Iq+|bB4BFqgmKr~eN zkRpM}%D_Pwc~pSv9Ja|hiT{T?(pVl;@0@Lgc;w-3;d-WwIsw`{Kz{n^w=m++63pn( zUv~Qt`e$Nkj7gHB(cg|2WvvsMDXq?4US0rvExu4~hqGkA;+!p%>b)oY(KW>dG$>Gm z9^)Y~KSvJPnIlA`y5q@@t(UxpdAl3YAb|qe06Lgq@|>ySVU7s@>D7YE>$yZjqDW_$ zZOq=5yjrGW%|OX%b_B%@u0$g?xB40sEbDJA06ZM(knS01 zFxiy?S#oHf1&IhmFi;h?xBsWf&)YKR4wTz3ww1yPg7~Ujk=?(95`>N+`FIjzB7=`xgp-3Ii|_A v-L*#M1-Os`xjizc_9V{28N4&u}U&9FfcHbe*b%n3x}M~_rK5Wk@!p~u3}?Yc@T%j z4d4FG=fNQ-`ReaWWn8W~_V;fuE;+v+fA``tRQLoQjctGbuEnL1;m6;Pn; nK|7p*Vcr9Vb=PsZY=8j(pK((@7DvN500000NkvXXu0mjfz4L%z diff --git a/实验六/train.py b/实验六/train.py index ed943d0..d6a7327 100644 --- a/实验六/train.py +++ b/实验六/train.py @@ -64,8 +64,8 @@ def ensure_dir_exists(directory): os.makedirs(directory) # 加载训练数据 -trains_paths, trains_labels = load_data("cache/pretrains/train") -test_paths, test_labels = load_data("cache/pretrains/test") +trains_paths, trains_labels = load_data("DataImages-Train") +test_paths, test_labels = load_data("DataImages-Test") # 提取特征和标签 X_train = np.array([extract_features(train_path) for train_path in tqdm.tqdm(trains_paths, desc="训练集特征提取中:")]) @@ -87,7 +87,7 @@ for test_sample in tqdm.tqdm(X_test, desc="测试集中预测进度"): Y_pred.append(classifier.predict(test_sample.reshape(1, -1))) accuracy = accuracy_score(Y_test, Y_pred) -print(f"性能: {accuracy * 100:.2f}%") +print(f"准确率: {accuracy * 100:.2f}%") # 保存模型 ensure_dir_exists("models")